diff -Nurap linux-2.6.9-100.orig/COPYING.SWsoft linux-2.6.9-ve023stab054/COPYING.SWsoft
--- linux-2.6.9-100.orig/COPYING.SWsoft	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/COPYING.SWsoft	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,350 @@
+
+Nothing in this license should be construed as a grant by SWsoft of any rights
+beyond the rights specified in the GNU General Public License, and nothing in
+this license should be construed as a waiver by SWsoft of its patent, copyright
+and/or trademark rights, beyond the waiver required by the GNU General Public
+License. This license is expressly inapplicable to any product that is not
+within the scope of the GNU General Public License
+
+----------------------------------------
+
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff -Nurap linux-2.6.9-100.orig/Documentation/power/swsusp.txt linux-2.6.9-ve023stab054/Documentation/power/swsusp.txt
--- linux-2.6.9-100.orig/Documentation/power/swsusp.txt	2004-10-19 01:53:05.000000000 +0400
+++ linux-2.6.9-ve023stab054/Documentation/power/swsusp.txt	2011-06-15 19:26:18.000000000 +0400
@@ -183,8 +183,8 @@ A: All such kernel threads need to be fi
 where it is safe to be frozen (no kernel semaphores should be held at
 that point and it must be safe to sleep there), and add:
 
-            if (current->flags & PF_FREEZE)
-                    refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 
 Q: What is the difference between between "platform", "shutdown" and
 "firmware" in /sys/power/disk?
diff -Nurap linux-2.6.9-100.orig/Documentation/ve.txt linux-2.6.9-ve023stab054/Documentation/ve.txt
--- linux-2.6.9-100.orig/Documentation/ve.txt	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/Documentation/ve.txt	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,37 @@
+                        OpenVZ Overview
+                        ---------------
+   (C) SWsoft, 2005, http://www.sw-soft.com, All rights reserved.
+   Licensing governed by "linux/COPYING.SWsoft" file.
+
+OpenVZ is a virtualization technology which allows to run multiple
+isolated VPSs (Virtual Private Server) on a single operating system.
+It uses a single instance of Linux kernel in memory which efficiently
+manages resources between VPSs.
+
+Virtual environment (VE) notion which is used in kernel is the original
+name of more modern notion of Virtual Private Server (VPS).
+
+From user point of view, every VPS is an isolated operating system with
+private file system, private set of users, private root superuser,
+private set of processes and so on. Every application which do not
+require direct hardware access can't feel the difference between VPS
+and real standalone server.
+
+From kernel point of view, VPS is an isolated set of processes spawned
+from their private 'init' process. Kernel controls which resources are
+accessible inside VPS and which amount of these resources can be
+consumed/used by VPS processes. Also kernel provides isolation between
+VPSs thus ensuring that one VPS can't use private resources of another
+VPS, make DoS/hack/crash attack on it's neighbour and so on.
+
+main Open Virtuozzo config options:
+    CONFIG_FAIRSCHED=y
+    CONFIG_SCHED_VCPU=y
+    CONFIG_VE=y
+    CONFIG_VE_CALLS=m
+    CONFIG_VE_NETDEV=m
+    CONFIG_VE_IPTABLES=y
+
+Official product pages:
+    http://www.virtuozzo.com
+    http://openvz.org
diff -Nurap linux-2.6.9-100.orig/Documentation/vsched.txt linux-2.6.9-ve023stab054/Documentation/vsched.txt
--- linux-2.6.9-100.orig/Documentation/vsched.txt	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/Documentation/vsched.txt	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,83 @@
+Copyright (C) 2005 SWsoft. All rights reserved.
+Licensing governed by "linux/COPYING.SWsoft" file.
+
+Hierarchical CPU schedulers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Hierarchical CPU scheduler is a stack of CPU schedulers which allows
+to organize different policies of scheduling in the system and/or between
+groups of processes.
+
+Virtuozzo uses a hierarchical Fair CPU scheduler organized as a 2-stage
+CPU scheduler, where the scheduling decisions are made in 2 steps:
+1. On the first step Fair CPU scheduler selects a group of processes
+  which should get some CPU time.
+2. Then standard Linux scheduler chooses a process inside the group.
+Such scheduler efficiently allows to isolate one group of processes
+from another and still allows a group to use more than 1 CPU on SMP systems.
+
+This document describes a new middle layer of Virtuozzo hierarchical CPU
+scheduler which makes decisions after Fair scheduler, but before Linux
+scheduler and which is called VCPU scheduler.
+
+
+Where VCPU scheduler comes from?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Existing hierarchical CPU scheduler uses isolated algorithms on each stage
+of decision making, i.e. every scheduler makes its decisions without
+taking into account the details of other schedulers. This can lead to a number
+of problems described below.
+
+On SMP systems there are possible situations when the first CPU scheduler
+in the hierarchy (e.g. Fair scheduler) wants to schedule some group of
+processes on the physical CPU, but the underlying process scheduler
+(e.g. Linux O(1) CPU scheduler) is unable to schedule any processes
+on this physical CPU. Usually this happens due to the fact that Linux
+kernel scheduler uses per-physical CPU runqueues.
+
+Another problem is that Linux scheduler also knows nothing about
+Fair scheduler and can't balance efficiently without taking into account
+statistics about process groups from Fair scheduler. Without such
+statistics Linux scheduler can concentrate all processes on one physical
+CPU, thus making CPU consuming highly inefficient.
+
+VCPU scheduler solves these problems by adding a new layer between
+Fair schedule and Linux scheduler.
+
+VCPU scheduler
+~~~~~~~~~~~~~~
+
+VCPU scheduler is a CPU scheduler which splits notion of
+physical and virtual CPUs (VCPU and PCPU). This means that tasks are
+running on virtual CPU runqueues, while VCPUs are running on PCPUs.
+
+The Virtuozzo hierarchical fair scheduler becomes 3 stage CPU scheduler:
+1. First, Fair CPU scheduler select a group of processes.
+2. Then VCPU scheduler select a virtual CPU to run (this is actually
+  a runqueue).
+3. Standard Linux scheduler chooses a process from the runqueue.
+
+For example on the picture below PCPU0 executes tasks from
+VCPU1 runqueue and PCPU1 is idle:
+
+   virtual          |         physical       |          virtual
+  idle CPUs         |           CPUs         |           CPUS
+--------------------|------------------------|--------------------------
+                    |                        |     -----------------
+                    |                        |    | virtual sched X |
+                    |                        |    |   -----------   |
+                    |                        |    |  |   VCPU0   |  |
+                    |                        |    |   -----------   |
+ ------------       |        -----------          |   -----------   |
+| idle VCPU0 |      |       |   PCPU0   |  <--->  |  |   VCPU1   |  |
+ ------------       |        -----------          |   -----------   |
+                    |                        |     -----------------
+                    |                        |
+                    |                        |     -----------------
+                    |                        |    | virtual sched Y |
+ ------------                -----------     |    |   -----------   |
+| idle VCPU1 |    <--->     |   PCPU1   |    |    |  |   VCPU0   |  |
+ ------------                -----------     |    |   -----------   |
+                    |                        |     -----------------
+                    |                        |
diff -Nurap linux-2.6.9-100.orig/arch/arm/kernel/signal.c linux-2.6.9-ve023stab054/arch/arm/kernel/signal.c
--- linux-2.6.9-100.orig/arch/arm/kernel/signal.c	2004-10-19 01:53:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/arm/kernel/signal.c	2011-06-15 19:26:18.000000000 +0400
@@ -672,9 +672,10 @@ static int do_signal(sigset_t *oldset, s
 	if (!user_mode(regs))
 		return 0;
 
-	if (current->flags & PF_FREEZE) {
-		refrigerator(0);
-		goto no_signal;
+	if (unlikely(test_thread_flag(TIF_FREEZE))) {
+		refrigerator();
+		if (!signal_pending(current))
+			goto no_signal;
 	}
 
 	if (current->ptrace & PT_SINGLESTEP)
diff -Nurap linux-2.6.9-100.orig/arch/i386/Kconfig.debug linux-2.6.9-ve023stab054/arch/i386/Kconfig.debug
--- linux-2.6.9-100.orig/arch/i386/Kconfig.debug	2004-10-19 01:54:08.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/Kconfig.debug	2011-06-15 19:26:19.000000000 +0400
@@ -55,6 +55,14 @@ config 4KSTACKS
 	  on the VM subsystem for higher order allocations. This option
 	  will also use IRQ stacks to compensate for the reduced stackspace.
 
+config NMI_WATCHDOG
+	bool "NMI Watchdog"
+	default y
+	help
+	  If you say Y here the kernel will activate NMI watchdog by default
+	  on boot. You can still activate NMI watchdog via nmi_watchdog
+	  command line option even if you say N here.
+
 config SCHEDSTATS
 	bool "Collect scheduler statistics"
 	depends on DEBUG_KERNEL && PROC_FS
diff -Nurap linux-2.6.9-100.orig/arch/i386/boot/video.S linux-2.6.9-ve023stab054/arch/i386/boot/video.S
--- linux-2.6.9-100.orig/arch/i386/boot/video.S	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/boot/video.S	2011-06-15 19:26:19.000000000 +0400
@@ -123,6 +123,9 @@ video:	pushw	%ds		# We use different seg
 	cmpw	$ASK_VGA, %ax			# Bring up the menu
 	jz	vid2
 
+#ifndef CONFIG_FB
+	mov	$VIDEO_80x25, %ax		# hack to force 80x25 mode
+#endif
 	call	mode_set			# Set the mode
 	jc	vid1
 
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/apic.c linux-2.6.9-ve023stab054/arch/i386/kernel/apic.c
--- linux-2.6.9-100.orig/arch/i386/kernel/apic.c	2011-06-09 19:22:59.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/apic.c	2011-06-15 19:26:19.000000000 +0400
@@ -1016,9 +1016,7 @@ void __init setup_boot_APIC_clock(void)
 
 void __init setup_secondary_APIC_clock(void)
 {
-	local_irq_disable(); /* FIXME: Do we need this? --RR */
 	setup_APIC_timer(calibration_result);
-	local_irq_enable();
 }
 
 void __init disable_APIC_timer(void)
@@ -1143,6 +1141,7 @@ void smp_apic_timer_interrupt(struct pt_
 #endif
 	int		cpu;
 	u32		*isp;
+	struct ve_struct *envid;
 
 	/*
 	 * the NMI deadlock-detector uses this.
@@ -1160,6 +1159,7 @@ void smp_apic_timer_interrupt(struct pt_
 	 * Besides, if we don't timer interrupts ignore the global
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
+	envid = set_exec_env(get_ve0());
 	irq_enter();
 
 #ifdef CONFIG_4KSTACKS
@@ -1186,6 +1186,7 @@ void smp_apic_timer_interrupt(struct pt_
 		smp_local_timer_interrupt(&regs);
 
 	irq_exit();
+	(void)set_exec_env(envid);
 }
 
 /*
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/cpu/common.c linux-2.6.9-ve023stab054/arch/i386/kernel/cpu/common.c
--- linux-2.6.9-100.orig/arch/i386/kernel/cpu/common.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/cpu/common.c	2011-06-15 19:26:18.000000000 +0400
@@ -235,8 +235,6 @@ void __init early_cpu_detect(void)
 			c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
 
 	}
-
-	early_intel_workaround(c);
 }
 
 void __init generic_identify(struct cpuinfo_x86 * c)
@@ -302,6 +300,8 @@ static void __init squash_the_stupid_ser
 		/* Disabling the serial number may affect the cpuid level */
 		c->cpuid_level = cpuid_eax(0);
 	}
+
+	early_intel_workaround(c);
 }
 
 static int __init x86_serial_nr_setup(char *s)
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/cpu/mcheck/mce.c linux-2.6.9-ve023stab054/arch/i386/kernel/cpu/mcheck/mce.c
--- linux-2.6.9-100.orig/arch/i386/kernel/cpu/mcheck/mce.c	2011-06-09 19:22:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/cpu/mcheck/mce.c	2011-06-15 19:26:19.000000000 +0400
@@ -28,7 +28,8 @@ static asmlinkage void unexpected_machin
 }
 
 /* Call the installed machine check handler for this CPU setup. */
-void asmlinkage (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
+void asmlinkage (*machine_check_vector)(struct pt_regs *, long error_code)
+	__attribute__((__section__(".entry.text"))) = unexpected_machine_check;
 
 asmlinkage void do_machine_check(struct pt_regs *regs, long error_code)
 {
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/cpu/mtrr/if.c linux-2.6.9-ve023stab054/arch/i386/kernel/cpu/mtrr/if.c
--- linux-2.6.9-100.orig/arch/i386/kernel/cpu/mtrr/if.c	2004-10-19 01:53:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/cpu/mtrr/if.c	2011-06-15 19:26:19.000000000 +0400
@@ -358,7 +358,7 @@ static int __init mtrr_if_init(void)
 		return -ENODEV;
 
 	proc_root_mtrr =
-	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
+	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, NULL);
 	if (proc_root_mtrr) {
 		proc_root_mtrr->owner = THIS_MODULE;
 		proc_root_mtrr->proc_fops = &mtrr_fops;
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/cpu/proc.c linux-2.6.9-ve023stab054/arch/i386/kernel/cpu/proc.c
--- linux-2.6.9-100.orig/arch/i386/kernel/cpu/proc.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/cpu/proc.c	2011-06-15 19:26:19.000000000 +0400
@@ -4,6 +4,8 @@
 #include <asm/semaphore.h>
 #include <linux/seq_file.h>
 #include <linux/cpufreq.h>
+#include <linux/vsched.h>
+#include <linux/fairsched.h>
 
 #ifdef CONFIG_XEN
 int smp_num_cores = 1;
@@ -71,11 +73,17 @@ static int show_cpuinfo(struct seq_file 
 	struct cpuinfo_x86 *c = v;
 	int i, n = c - cpu_data;
 	int fpu_exception;
+	unsigned long vcpu_khz;
 
 #ifdef CONFIG_SMP
-	if (!cpu_online(n))
+	if (!vcpu_online(n))
 		return 0;
 #endif
+#ifdef CONFIG_FAIRSCHED
+	vcpu_khz = ve_scale_khz(cpu_khz);
+#else
+	vcpu_khz = cpu_khz;
+#endif
 	seq_printf(m, "processor\t: %d\n"
 		"vendor_id\t: %s\n"
 		"cpu family\t: %d\n"
@@ -95,7 +103,7 @@ static int show_cpuinfo(struct seq_file 
 	if ( cpu_has(c, X86_FEATURE_TSC) ) {
 		unsigned int freq = cpufreq_quick_get(n);
 		if (!freq)
-			freq = cpu_khz;
+			freq = vcpu_khz;
 		seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n",
 			freq / 1000, (freq % 1000));
 	}
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/entry.S linux-2.6.9-ve023stab054/arch/i386/kernel/entry.S
--- linux-2.6.9-100.orig/arch/i386/kernel/entry.S	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/entry.S	2011-06-15 19:26:22.000000000 +0400
@@ -1076,4 +1076,27 @@ ENTRY(sys_call_table)
 	.long sys_request_key
 	.long sys_keyctl
 
+	.rept 500-(.-sys_call_table)/4
+		.long sys_ni_syscall
+	.endr
+	.long sys_fairsched_mknod	/* 500 */
+	.long sys_fairsched_rmnod
+	.long sys_fairsched_chwt
+	.long sys_fairsched_mvpr
+	.long sys_fairsched_rate
+	.long sys_fairsched_vcpus
+
+	.rept 510-(.-sys_call_table)/4
+		.long sys_ni_syscall
+	.endr
+
+	.long sys_getluid		/* 510 */
+	.long sys_setluid
+	.long sys_setublimit
+	.long sys_ubstat
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_lchmod		/* 516 */
+	.long sys_lutime
+
 syscall_table_size=(.-sys_call_table)
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/entry_trampoline.c linux-2.6.9-ve023stab054/arch/i386/kernel/entry_trampoline.c
--- linux-2.6.9-100.orig/arch/i386/kernel/entry_trampoline.c	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/entry_trampoline.c	2011-06-15 19:26:19.000000000 +0400
@@ -47,14 +47,14 @@ void __init init_entry_mappings(void)
 	 * set up the initial thread's virtual stack related
 	 * fields:
 	 */
-	for (p = 0; p < ARRAY_SIZE(current->thread.stack_page); p++)
-		current->thread.stack_page[p] = virt_to_page((char *)current->thread_info + (p*PAGE_SIZE));
+	for (p = 0; p < ARRAY_SIZE(current->thread_info->stack_page); p++)
+		current->thread_info->stack_page[p] = virt_to_page((char *)current->thread_info + (p*PAGE_SIZE));
 
 	current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK_TOP);
 
-	for (p = 0; p < ARRAY_SIZE(current->thread.stack_page); p++) {
+	for (p = 0; p < ARRAY_SIZE(current->thread_info->stack_page); p++) {
 		__kunmap_atomic_type(KM_VSTACK_TOP-p);
-		__kmap_atomic(current->thread.stack_page[p], KM_VSTACK_TOP-p);
+		__kmap_atomic(current->thread_info->stack_page[p], KM_VSTACK_TOP-p);
 	}
 #endif
 	current->thread_info->real_stack = (void *)current->thread_info;
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/i386_ksyms.c linux-2.6.9-ve023stab054/arch/i386/kernel/i386_ksyms.c
--- linux-2.6.9-100.orig/arch/i386/kernel/i386_ksyms.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/i386_ksyms.c	2011-06-15 19:26:21.000000000 +0400
@@ -180,6 +180,8 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback);
 extern int memcmp(const void *,const void *,__kernel_size_t);
 EXPORT_SYMBOL(memcmp);
 
+EXPORT_SYMBOL(empty_zero_page);
+
 EXPORT_SYMBOL(register_die_notifier);
 #ifdef CONFIG_HAVE_DEC_LOCK
 EXPORT_SYMBOL(atomic_dec_and_lock);
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/irq.c linux-2.6.9-ve023stab054/arch/i386/kernel/irq.c
--- linux-2.6.9-100.orig/arch/i386/kernel/irq.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/irq.c	2011-06-15 19:26:19.000000000 +0400
@@ -65,6 +65,9 @@ static inline int call_on_stack(void *fu
 	return retval;
 }
 
+#include <ub/beancounter.h>
+#include <ub/ub_task.h>
+
 /*
  * Linux has a controller-independent x86 interrupt architecture.
  * every controller has a 'controller-template', that is used
@@ -241,10 +244,12 @@ asmlinkage int handle_IRQ_event(unsigned
 {
 	int status = 1;	/* Force the "do bottom halves" bit */
 	int ret, retval = 0;
+	struct user_beancounter *ub;
 
 	if (!(action->flags & SA_INTERRUPT))
 		local_irq_enable();
 
+	ub = set_exec_ub(get_ub0());
 	do {
 		ret = action->handler(irq, action->dev_id, regs);
 		if (ret == IRQ_HANDLED)
@@ -252,6 +257,8 @@ asmlinkage int handle_IRQ_event(unsigned
 		retval |= ret;
 		action = action->next;
 	} while (action);
+	(void)set_exec_ub(ub);
+
 	if (status & SA_SAMPLE_RANDOM)
 		add_interrupt_randomness(irq);
 	local_irq_disable();
@@ -600,6 +607,9 @@ asmlinkage unsigned int do_IRQ(struct pt
 	struct irqaction * action;
 	unsigned int status;
 	int overflow = 0;
+	struct ve_struct *envid;
+
+	envid = set_exec_env(get_ve0());
 
 	irq_enter();
 
@@ -735,6 +745,7 @@ out:
 	spin_unlock(&desc->lock);
 
 	irq_exit();
+	(void)set_exec_env(envid);
 
 	return 1;
 }
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/ldt.c linux-2.6.9-ve023stab054/arch/i386/kernel/ldt.c
--- linux-2.6.9-100.orig/arch/i386/kernel/ldt.c	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/ldt.c	2011-06-15 19:26:21.000000000 +0400
@@ -13,6 +13,7 @@
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -44,7 +45,7 @@ static int alloc_ldt(mm_context_t *pc, i
 		int nr = i/PAGE_SIZE;
 		BUG_ON(i >= 64*1024);
 		if (!pc->ldt_pages[nr]) {
-			pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER);
+			pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER|__GFP_UBC);
 			if (!pc->ldt_pages[nr])
 				return -ENOMEM;
 			clear_highpage(pc->ldt_pages[nr]);
@@ -84,6 +85,15 @@ static inline int copy_ldt(mm_context_t 
 	return 0;
 }
 
+static void free_ldt(mm_context_t *mc)
+{
+	int i;
+
+	for (i = 0; i < MAX_LDT_PAGES; i++)
+		if (mc->ldt_pages[i])
+			__free_page(mc->ldt_pages[i]);
+}
+
 /*
  * we do not have to muck with descriptors here, that is
  * done in switch_mm() as needed.
@@ -100,10 +110,13 @@ int init_new_context(struct task_struct 
 	if (old_mm && old_mm->context.size > 0) {
 		down(&old_mm->context.sem);
 		retval = copy_ldt(&mm->context, &old_mm->context);
+		if (retval < 0)
+			free_ldt(&mm->context);
 		up(&old_mm->context.sem);
 	}
 	return retval;
 }
+EXPORT_SYMBOL(init_new_context);
 
 /*
  * No need to lock the MM as we are the last user
@@ -112,10 +125,9 @@ int init_new_context(struct task_struct 
  */
 void destroy_context(struct mm_struct *mm)
 {
-	int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE;
-
-	for (i = 0; i < nr_pages; i++)
-		__free_page(mm->context.ldt_pages[i]);
+	/* we have to free *all* the pages, since alloc_ldt could allocate more pages,
+	   then we have according to size if ENOMEM happened on allocation */
+	free_ldt(&mm->context);
 	mm->context.size = 0;
 }
 
@@ -300,3 +312,4 @@ void load_LDT_nolock(mm_context_t *pc, i
 	set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count);
 	load_LDT_desc();
 }
+EXPORT_SYMBOL(load_LDT_nolock);
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/nmi.c linux-2.6.9-ve023stab054/arch/i386/kernel/nmi.c
--- linux-2.6.9-100.orig/arch/i386/kernel/nmi.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/nmi.c	2011-06-15 19:26:22.000000000 +0400
@@ -34,7 +34,12 @@
 
 #include "mach_traps.h"
 
-unsigned int nmi_watchdog = NMI_NONE;
+#ifdef CONFIG_NMI_WATCHDOG
+#define NMI_DEFAULT NMI_IO_APIC
+#else
+#define NMI_DEFAULT NMI_NONE
+#endif
+unsigned int nmi_watchdog = NMI_DEFAULT;
 extern int unknown_nmi_panic;
 static unsigned int nmi_hz = HZ;
 extern void show_registers(struct pt_regs *regs);
@@ -312,6 +317,21 @@ void touch_nmi_watchdog (void)
 		alert_counter[i] = 0;
 }
 
+static spinlock_t show_regs_lock = SPIN_LOCK_UNLOCKED;
+
+void smp_show_regs(struct pt_regs *regs, void *info)
+{
+	if (regs == NULL)
+		return;
+
+	spin_lock(&show_regs_lock);
+	bust_spinlocks(1);
+	printk("----------- IPI show regs -----------");
+	show_regs(regs);
+	bust_spinlocks(0);
+	spin_unlock(&show_regs_lock);
+}
+
 extern void die_nmi(struct pt_regs *, const char *msg);
 
 int nmi_watchdog_tick (struct pt_regs * regs)
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/process.c linux-2.6.9-ve023stab054/arch/i386/kernel/process.c
--- linux-2.6.9-100.orig/arch/i386/kernel/process.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/process.c	2011-06-15 19:26:22.000000000 +0400
@@ -38,6 +38,7 @@
 #include <linux/ptrace.h>
 #include <linux/mman.h>
 #include <linux/random.h>
+#include <linux/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -57,6 +58,7 @@
 #include <linux/err.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+EXPORT_SYMBOL_GPL(ret_from_fork);
 
 int hlt_counter;
 
@@ -228,11 +230,14 @@ __setup("idle=", idle_setup);
 void show_regs(struct pt_regs * regs)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+	extern int die_counter;
 
 	printk("\n");
-	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
-	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
-	print_symbol("EIP is at %s\n", regs->eip);
+	printk("Pid: %d, comm: %20s, oopses: %d\n", current->pid, current->comm, die_counter);
+	printk("EIP: %04x:[<%08lx>] CPU: %d, VCPU: %d:%d\n",0xffff & regs->xcs,regs->eip, smp_processor_id(),
+			task_vsched_id(current), task_cpu(current));
+	if (decode_call_traces)
+		print_symbol("EIP is at %s\n", regs->eip);
 
 	if (regs->xcs & 3)
 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
@@ -256,6 +261,8 @@ void show_regs(struct pt_regs * regs)
 		: "=r" (cr4): "0" (0));
 	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
 	show_trace(NULL, &regs->esp);
+	if (!decode_call_traces)
+		printk(" EIP: [<%08lx>]\n",regs->eip);
 }
 
 EXPORT_SYMBOL_GPL(show_regs);
@@ -283,6 +290,13 @@ int kernel_thread(int (*fn)(void *), voi
 {
 	struct pt_regs regs;
 
+	/* Don't allow kernel_thread() inside VE */
+	if (!ve_allow_kthreads && !ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside VE\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 
 	regs.ebx = (unsigned long) fn;
@@ -389,8 +403,8 @@ int copy_thread(int nr, unsigned long cl
 	 * IMPORTANT: this code relies on the fact that the task
 	 * structure is an THREAD_SIZE aligned piece of physical memory.
 	 */
-	for (i = 0; i < ARRAY_SIZE(p->thread.stack_page); i++)
-		p->thread.stack_page[i] =
+	for (i = 0; i < ARRAY_SIZE(p->thread_info->stack_page); i++)
+		p->thread_info->stack_page[i] =
 				virt_to_page((unsigned long)p->thread_info + (i*PAGE_SIZE));
 
 	p->thread.eip = (unsigned long) ret_from_fork;
@@ -590,9 +604,9 @@ struct task_struct fastcall * __switch_t
 	 * needed because otherwise NMIs could interrupt the
 	 * user-return code with a virtual stack and stale TLBs.)
 	 */
-	for (i = 0; i < ARRAY_SIZE(next->stack_page); i++) {
+	for (i = 0; i < ARRAY_SIZE(next_p->thread_info->stack_page); i++) {
 		__kunmap_atomic_type(KM_VSTACK_TOP-i);
-		__kmap_atomic(next->stack_page[i], KM_VSTACK_TOP-i);
+		__kmap_atomic(next_p->thread_info->stack_page[i], KM_VSTACK_TOP-i);
 	}
 	/*
 	 * NOTE: here we rely on the task being the stack as well
@@ -872,7 +886,7 @@ asmlinkage int sys_get_thread_area(struc
 unsigned long arch_align_stack(unsigned long sp)
 {
 	if (current->flags & PF_RELOCEXEC)
-		sp -= ((get_random_int() % 65536) << 4);
+		sp -= (get_random_int() % 8192);
 	return sp & ~0xf;
 }
 
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/ptrace.c linux-2.6.9-ve023stab054/arch/i386/kernel/ptrace.c
--- linux-2.6.9-100.orig/arch/i386/kernel/ptrace.c	2011-06-09 19:22:47.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/ptrace.c	2011-06-15 19:26:21.000000000 +0400
@@ -241,20 +241,23 @@ asmlinkage int sys_ptrace(long request, 
 	lock_kernel();
 	ret = -EPERM;
 	if (request == PTRACE_TRACEME) {
+		task_lock(current);
 		/* are we already being traced? */
 		if (current->ptrace & PT_PTRACED)
-			goto out;
+			goto out_task_unlock;
 		ret = security_ptrace(current->parent, current);
 		if (ret)
-			goto out;
+			goto out_task_unlock;
 		/* set the ptrace bit in the process flags. */
 		current->ptrace |= PT_PTRACED;
 		ret = 0;
+out_task_unlock:
+		task_unlock(current);
 		goto out;
 	}
 	ret = -ESRCH;
 	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
+	child = find_task_by_pid_ve(pid);
 	if (child)
 		get_task_struct(child);
 	read_unlock(&tasklist_lock);
@@ -540,8 +543,10 @@ void do_syscall_trace(struct pt_regs *re
 		goto out;
 	/* the 0x80 provides a way for the tracing parent to distinguish
 	   between a syscall stop and SIGTRAP delivery */
+	set_pn_state(current, entryexit ? PN_STOP_LEAVE : PN_STOP_ENTRY);
 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) &&
 				 !test_thread_flag(TIF_SINGLESTEP) ? 0x80 : 0));
+	clear_pn_state(current);
 
 	/*
 	 * this isn't the same as continuing with a signal, but it will do
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/setup.c linux-2.6.9-ve023stab054/arch/i386/kernel/setup.c
--- linux-2.6.9-100.orig/arch/i386/kernel/setup.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/setup.c	2011-06-15 19:26:19.000000000 +0400
@@ -39,6 +39,7 @@
 #include <linux/efi.h>
 #include <linux/init.h>
 #include <linux/edd.h>
+#include <linux/mmzone.h>
 #include <video/edid.h>
 
 #include <asm/apic.h>
@@ -123,6 +124,7 @@ extern void dmi_scan_machine(void);
 extern void generic_apic_probe(char *);
 extern int root_mountflags;
 extern int disable_timer_pin_1;
+extern void check_ioapic(void);
 
 unsigned long saved_videomode;
 
@@ -866,8 +868,11 @@ static void __init parse_cmdline_early (
 			lapic_disable();
 #endif /* CONFIG_X86_LOCAL_APIC */
 #endif /* CONFIG_ACPI_BOOT */
+
+#ifdef CONFIG_X86_IO_APIC
 		else if (!memcmp(from, "disable_timer_pin_1", 19))
 			disable_timer_pin_1 = 1;
+#endif
 
 		/*
 		 * highmem=size forces highmem to be exactly 'size' bytes.
@@ -1168,7 +1173,19 @@ static unsigned long __init setup_memory
 				INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
 			initrd_end = initrd_start+INITRD_SIZE;
 		}
-		else {
+		else if ((max_low_pfn << PAGE_SHIFT) <
+				PAGE_ALIGN(INITRD_START + INITRD_SIZE)) {
+			/* GRUB places initrd as high as possible, so when
+			   VMALLOC_AREA is bigger than std Linux has, such
+			   initrd is inaccessiable in normal zone (highmem) */
+
+			/* initrd should be totally in highmem, sorry */
+			BUG_ON(INITRD_START < (max_low_pfn << PAGE_SHIFT));
+
+			initrd_copy = INITRD_SIZE;
+			printk(KERN_ERR "initrd: GRUB workaround enabled\n");
+			/* initrd is copied from highmem in initrd_move() */
+		} else {
 			printk(KERN_ERR "initrd extends beyond end of memory "
 			    "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
 			    INITRD_START + INITRD_SIZE,
@@ -1512,7 +1529,9 @@ void __init setup_arch(char **cmdline_p)
 	 * Parse the ACPI tables for possible boot-time SMP configuration.
 	 */
 	acpi_boot_table_init();
+#ifdef CONFIG_X86_IO_APIC
 	check_ioapic();
+#endif
 	acpi_boot_init();
 
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/signal.c linux-2.6.9-ve023stab054/arch/i386/kernel/signal.c
--- linux-2.6.9-100.orig/arch/i386/kernel/signal.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/signal.c	2011-06-15 19:26:21.000000000 +0400
@@ -26,6 +26,7 @@
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include "sigframe.h"
+#include <linux/module.h>
 
 #define DEBUG_SIG 0
 
@@ -43,6 +44,7 @@ sys_sigsuspend(int history0, int history
 	mask &= _BLOCKABLE;
 	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
+	set_sigsuspend_state(current, saveset);
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
@@ -51,8 +53,10 @@ sys_sigsuspend(int history0, int history
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
 		schedule();
-		if (do_signal(regs, &saveset))
+		if (do_signal(regs, &saveset)) {
+			clear_sigsuspend_state(current);
 			return -EINTR;
+		}
 	}
 }
 
@@ -71,6 +75,7 @@ sys_rt_sigsuspend(struct pt_regs regs)
 
 	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
+	set_sigsuspend_state(current, saveset);
 	current->blocked = newset;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
@@ -79,8 +84,10 @@ sys_rt_sigsuspend(struct pt_regs regs)
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
 		schedule();
-		if (do_signal(&regs, &saveset))
+		if (do_signal(&regs, &saveset)) {
+			clear_sigsuspend_state(current);
 			return -EINTR;
+		}
 	}
 }
 
@@ -590,9 +597,10 @@ int fastcall do_signal(struct pt_regs *r
 	if ((regs->xcs & 3) != 3)
 		return 1;
 
-	if (current->flags & PF_FREEZE) {
-		refrigerator(0);
-		goto no_signal;
+	if (unlikely(test_thread_flag(TIF_FREEZE))) {
+		refrigerator();
+		if (!signal_pending(current))
+			goto no_signal;
 	}
 
 	if (!oldset)
@@ -631,6 +639,7 @@ int fastcall do_signal(struct pt_regs *r
 	}
 	return 0;
 }
+EXPORT_SYMBOL(do_signal);
 
 /*
  * notification of userspace execution resumption
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/smp.c linux-2.6.9-ve023stab054/arch/i386/kernel/smp.c
--- linux-2.6.9-100.orig/arch/i386/kernel/smp.c	2011-06-09 19:22:59.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/smp.c	2011-06-15 19:26:22.000000000 +0400
@@ -19,7 +19,9 @@
 #include <linux/mc146818rtc.h>
 #include <linux/cache.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 
+#include <asm/nmi.h>
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
 #include <asm/desc.h>
@@ -420,6 +422,7 @@ void flush_tlb_mm (struct mm_struct * mm
 
 	preempt_enable();
 }
+EXPORT_SYMBOL(flush_tlb_mm);
 
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
@@ -485,7 +488,8 @@ struct call_data_struct {
 	int wait;
 };
 
-static struct call_data_struct * call_data;
+static struct call_data_struct call_fn_data;
+static struct call_data_struct * call_data = &call_fn_data;
 static struct call_data_struct * saved_call_data;
 
 /*
@@ -566,42 +570,120 @@ int smp_call_function (void (*func) (voi
  * hardware interrupt handler or from a bottom half handler.
  */
 {
-	struct call_data_struct data;
 	int cpus = num_online_cpus()-1;
 
 	if (!cpus)
 		return 0;
 
 	/* Can deadlock when called with interrupts disabled */
-	/* Only if we are waiting for other CPU to ack */
-	WARN_ON(irqs_disabled() && wait >= 0);
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
+	WARN_ON(irqs_disabled());
 
 	spin_lock(&call_lock);
-	call_data = &data;
+	call_fn_data.func = func;
+	call_fn_data.info = info;
+	call_fn_data.wait = wait;
+	atomic_set(&call_fn_data.started, 0);
+	if (wait)
+		atomic_set(&call_fn_data.finished, 0);
 	mb();
-	
+
 	/* Send a message to all other CPUs and wait for them to respond */
 	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 
 	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
+	while (atomic_read(&call_fn_data.started) != cpus)
 		cpu_relax();
 
 	if (wait)
-		while (atomic_read(&data.finished) != cpus)
+		while (atomic_read(&call_fn_data.finished) != cpus)
 			cpu_relax();
 	spin_unlock(&call_lock);
 
 	return 0;
 }
 
+static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
+static struct nmi_call_data_struct {
+	smp_nmi_function func;
+	void *info;
+	atomic_t started;
+	atomic_t finished;
+	cpumask_t cpus_called;
+	int wait;
+} *nmi_call_data;
+
+static int smp_nmi_callback(struct pt_regs * regs, int cpu)
+{
+	smp_nmi_function func;
+	void *info;
+	int wait;
+
+	func = nmi_call_data->func;
+	info = nmi_call_data->info;
+	wait = nmi_call_data->wait;
+	/* prevent from calling func() multiple times */
+	if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
+		return 0;
+	/*
+	 * notify initiating CPU that I've grabbed the data and am
+	 * about to execute the function
+	 */
+	mb();
+	atomic_inc(&nmi_call_data->started);
+	/* at this point the nmi_call_data structure is out of scope */
+	irq_enter();
+	func(regs, info);
+	irq_exit();
+	if (wait)
+		atomic_inc(&nmi_call_data->finished);
+
+	return 0;
+}
+
+/*
+ * This function tries to call func(regs, info) on each cpu.
+ * Func must be fast and non-blocking.
+ * May be called with disabled interrupts and from any context.
+ */
+int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
+{
+	struct nmi_call_data_struct data;
+	int cpus;
+
+	cpus = num_online_cpus() - 1;
+	if (!cpus)
+		return 0;
+
+	data.func = func;
+	data.info = info;
+	data.wait = wait;
+	atomic_set(&data.started, 0);
+	atomic_set(&data.finished, 0);
+	cpus_clear(data.cpus_called);
+	/* prevent this cpu from calling func if NMI happens */
+	cpu_set(smp_processor_id(), data.cpus_called);
+
+	if (!spin_trylock(&nmi_call_lock))
+		return -1;
+
+	nmi_call_data = &data;
+	set_nmi_ipi_callback(smp_nmi_callback);
+	mb();
+
+	/* Send a message to all other CPUs and wait for them to respond */
+	send_IPI_allbutself(APIC_DM_NMI);
+	while (atomic_read(&data.started) != cpus)
+		barrier();
+
+	unset_nmi_ipi_callback();
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			barrier();
+	spin_unlock(&nmi_call_lock);
+
+	return 0;
+}
+
 static void stop_this_cpu (void * dummy)
 {
 	/*
@@ -619,9 +701,17 @@ static void stop_this_cpu (void * dummy)
  * this function calls the 'stop' function on all other CPUs in the system.
  */
 
+static struct call_data_struct stop_cpus_data = {
+	.func = stop_this_cpu,
+	.info = NULL,
+	.wait = 0,
+};
+
 void smp_send_stop(void)
 {
-	smp_call_function(stop_this_cpu, NULL, 1, 0);
+	call_data = &stop_cpus_data;
+	mb();
+	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 
 	local_irq_disable();
 	disable_local_APIC();
@@ -640,17 +730,24 @@ asmlinkage void smp_reschedule_interrupt
 
 asmlinkage void smp_call_function_interrupt(void)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
+	struct call_data_struct *data;
+	void (*func) (void *info);
+	void *info;
+	int wait;
 
 	ack_APIC_irq();
+
+	data = call_data;
+	func = data->func;
+	info = data->info;
+	wait = data->wait;
+
 	/*
 	 * Notify initiating CPU that I've grabbed the data and am
 	 * about to execute the function
 	 */
 	mb();
-	atomic_inc(&call_data->started);
+	atomic_inc(&data->started);
 	/*
 	 * At this point the info structure may be out of scope unless wait==1
 	 */
@@ -660,7 +757,7 @@ asmlinkage void smp_call_function_interr
 
 	if (wait) {
 		mb();
-		atomic_inc(&call_data->finished);
+		atomic_inc(&data->finished);
 	}
 }
 
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/smpboot.c linux-2.6.9-ve023stab054/arch/i386/kernel/smpboot.c
--- linux-2.6.9-100.orig/arch/i386/kernel/smpboot.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/smpboot.c	2011-06-15 19:26:22.000000000 +0400
@@ -306,6 +306,8 @@ static void __init synchronize_tsc_bp (v
 	}
 	if (!buggy)
 		printk("passed.\n");
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
 }
 
 static void __init synchronize_tsc_ap (void)
@@ -331,6 +333,8 @@ static void __init synchronize_tsc_ap (v
 		atomic_inc(&tsc_count_stop);
 		while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
 	}
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
 }
 #undef NR_LOOPS
 
@@ -527,6 +531,10 @@ int __init start_secondary(void *unused)
 	set_cpu_sibling_map(smp_processor_id());
 
 	cpu_set(smp_processor_id(), cpu_online_map);
+
+	/* We can take interrupts now: we're officially "up". */
+	local_irq_enable();
+
 	wmb();
 	return cpu_idle();
 }
@@ -855,6 +863,9 @@ static int __init do_boot_cpu(int apicid
 	if (IS_ERR(idle))
 		panic("failed fork for CPU %d", cpu);
 	idle->thread.eip = (unsigned long) start_secondary;
+	/* Cosmetic: sleep_time won't be changed afterwards for the idle
+	 * thread;  keep it 0 rather than -cycles. */
+	VE_TASK_INFO(idle)->sleep_time = 0;
 	/* start_eip had better be page-aligned! */
 	start_eip = setup_trampoline();
 
@@ -924,6 +935,7 @@ static int __init do_boot_cpu(int apicid
 		unmap_cpu_to_logical_apicid(cpu);
 		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
 		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+		fini_idle(cpu); /* undo fork_idle() */
 		cpucount--;
 	}
 
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/sys_i386.c linux-2.6.9-ve023stab054/arch/i386/kernel/sys_i386.c
--- linux-2.6.9-100.orig/arch/i386/kernel/sys_i386.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/sys_i386.c	2011-06-15 19:26:19.000000000 +0400
@@ -200,7 +200,7 @@ asmlinkage int sys_uname(struct old_utsn
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err=copy_to_user(name, &ve_utsname, sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
@@ -216,15 +216,15 @@ asmlinkage int sys_olduname(struct oldol
   
   	down_read(&uts_sem);
 	
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+	error = __copy_to_user(name->sysname,ve_utsname.sysname,__OLD_UTS_LEN);
 	error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->nodename,ve_utsname.nodename,__OLD_UTS_LEN);
 	error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->release,ve_utsname.release,__OLD_UTS_LEN);
 	error |= __put_user(0,name->release+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->version,ve_utsname.version,__OLD_UTS_LEN);
 	error |= __put_user(0,name->version+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->machine,ve_utsname.machine,__OLD_UTS_LEN);
 	error |= __put_user(0,name->machine+__OLD_UTS_LEN);
 	
 	up_read(&uts_sem);
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/sysenter.c linux-2.6.9-ve023stab054/arch/i386/kernel/sysenter.c
--- linux-2.6.9-100.orig/arch/i386/kernel/sysenter.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/sysenter.c	2011-06-15 19:26:19.000000000 +0400
@@ -15,6 +15,7 @@
 #include <linux/elf.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -51,6 +52,7 @@ extern const char vsyscall_int80_start, 
 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
 
 struct page *sysenter_page;
+EXPORT_SYMBOL(sysenter_page);
 
 static int __init sysenter_setup(void)
 {
@@ -79,30 +81,55 @@ static int __init sysenter_setup(void)
 __initcall(sysenter_setup);
 
 extern void SYSENTER_RETURN_OFFSET;
+EXPORT_SYMBOL(SYSENTER_RETURN_OFFSET);
 
 unsigned int vdso_enabled = 0;
 
-void map_vsyscall(void)
+static struct page *special_mapping_nopage(struct vm_area_struct *vma,
+		unsigned long address, int *type)
+{
+	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+
+	if (sysenter_page) {
+		get_page(sysenter_page);
+		return sysenter_page;
+	}
+
+	return NOPAGE_SIGBUS;
+}
+
+struct vm_operations_struct special_mapping_vmops = {
+	.nopage = special_mapping_nopage,
+};
+EXPORT_SYMBOL(special_mapping_vmops);
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree.
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+		int executable_stack, unsigned long map_address)
 {
 	struct thread_info *ti = current_thread_info();
 	struct vm_area_struct *vma;
-	unsigned long addr;
+	unsigned long addr = map_address;
 
-	if (unlikely(!vdso_enabled)) {
+	if (unlikely(!vdso_enabled) && map_address == 0) {
 		current->mm->context.vdso = NULL;
-		return;
+		return 0;
 	}
 
 	/*
 	 * Map the vDSO (it will be randomized):
 	 */
 	down_write(&current->mm->mmap_sem);
-	addr = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, 0);
+	addr = do_mmap(NULL, addr, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, 0);
 	current->mm->context.vdso = (void *)addr;
 	ti->sysenter_return = (void *)addr + (long)&SYSENTER_RETURN_OFFSET;
 	if (addr != -1) {
 		vma = find_vma(current->mm, addr);
 		if (vma) {
+			vma->vm_ops = &special_mapping_vmops;
 			pgprot_val(vma->vm_page_prot) &= ~_PAGE_RW;
 			get_page(sysenter_page);
 			install_page(current->mm, vma, addr,
@@ -111,7 +138,9 @@ void map_vsyscall(void)
 		}
 	}
 	up_write(&current->mm->mmap_sem);
+	return 0;
 }
+EXPORT_SYMBOL(arch_setup_additional_pages);
 
 static int __init vdso_setup(char *str)
 {
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/timers/timer_pit.c linux-2.6.9-ve023stab054/arch/i386/kernel/timers/timer_pit.c
--- linux-2.6.9-100.orig/arch/i386/kernel/timers/timer_pit.c	2011-06-09 19:22:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/timers/timer_pit.c	2011-06-15 19:26:18.000000000 +0400
@@ -31,6 +31,9 @@ static int __init init_pit(char* overrid
 				"to PIT\n");
  	init_cpu_khz();
 	count_p = LATCH;
+
+	init_cpu_khz();
+
 	return 0;
 }
 
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/timers/timer_tsc.c linux-2.6.9-ve023stab054/arch/i386/kernel/timers/timer_tsc.c
--- linux-2.6.9-100.orig/arch/i386/kernel/timers/timer_tsc.c	2011-06-09 19:23:05.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/timers/timer_tsc.c	2011-06-15 19:26:19.000000000 +0400
@@ -81,7 +81,7 @@ static int count2; /* counter for mark_o
  * Equal to 2^32 * (1 / (clocks per usec) ).
  * Initialized in time_init.
  */
-static unsigned long fast_gettimeoffset_quotient;
+unsigned long fast_gettimeoffset_quotient;
 
 static unsigned long get_offset_tsc(void)
 {
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/traps.c linux-2.6.9-ve023stab054/arch/i386/kernel/traps.c
--- linux-2.6.9-100.orig/arch/i386/kernel/traps.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/traps.c	2011-06-15 19:26:22.000000000 +0400
@@ -117,9 +117,11 @@ static inline unsigned long print_contex
 #ifdef	CONFIG_FRAME_POINTER
 	while (valid_stack_ptr(tinfo, (void *)ebp)) {
 		addr = *(unsigned long *)(ebp + 4);
-		printk(" [<%08lx>] ", addr);
-		print_symbol("%s", addr);
-		printk("\n");
+		printk(" [<%08lx>]", addr);
+		if (decode_call_traces) {
+			print_symbol(" %s", addr);
+			printk("\n");
+		}
 		ebp = *(unsigned long *)ebp;
 	}
 #else
@@ -127,8 +129,10 @@ static inline unsigned long print_contex
 		addr = *stack++;
 		if (__kernel_text_address(addr)) {
 			printk(" [<%08lx>]", addr);
-			print_symbol(" %s", addr);
-			printk("\n");
+			if (decode_call_traces) {
+				print_symbol(" %s", addr);
+				printk("\n");
+			}
 		}
 	}
 #endif
@@ -158,7 +162,10 @@ void show_trace(struct task_struct *task
 		stack = (unsigned long*)context->previous_esp;
 		if (!stack)
 			break;
-		printk(" =======================\n");
+		if (decode_call_traces)
+			printk(" =======================\n");
+		else
+			printk(" =<ctx>= ");
 	}
 }
 
@@ -182,10 +189,16 @@ void show_stack(struct task_struct *task
 			printk("\n       ");
 		printk("%08lx ", *stack++);
 	}
-	printk("\nCall Trace:\n");
+	printk("\nCall Trace:");
+	if (decode_call_traces)
+		printk("\n");
 	show_trace(task, esp);
+	if (!decode_call_traces)
+		printk("\n");
 }
 
+EXPORT_SYMBOL(show_stack);
+
 /*
  * The architecture-independent dump_stack generator
  */
@@ -194,6 +207,8 @@ void dump_stack(void)
 	unsigned long stack;
 
 	show_trace(current, &stack);
+	if (!decode_call_traces)
+		printk("\n");
 }
 
 EXPORT_SYMBOL(dump_stack);
@@ -213,9 +228,10 @@ void show_registers(struct pt_regs *regs
 		ss = regs->xss & 0xffff;
 	}
 	print_modules();
-	printk("CPU:    %d\nEIP:    %04x:[<%08lx>]    %s VLI\nEFLAGS: %08lx"
+	printk("CPU:    %d, VCPU: %d:%d\nEIP:    %04x:[<%08lx>]    %s VLI\nEFLAGS: %08lx"
 			"   (%s) \n",
-		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
+		smp_processor_id(), task_vsched_id(current), task_cpu(current),
+		0xffff & regs->xcs, regs->eip,
 		print_tainted(), regs->eflags, UTS_RELEASE);
 	print_symbol("EIP is at %s\n", regs->eip);
 	printk("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
@@ -224,8 +240,10 @@ void show_registers(struct pt_regs *regs
 		regs->esi, regs->edi, regs->ebp, esp);
 	printk("ds: %04x   es: %04x   ss: %04x\n",
 		regs->xds & 0xffff, regs->xes & 0xffff, ss);
-	printk("Process %s (pid: %d, threadinfo=%p task=%p)",
-		current->comm, current->pid, current_thread_info(), current);
+	printk("Process %s (pid: %d, veid=%d, threadinfo=%p task=%p)",
+		current->comm, current->pid,
+		VEID(VE_TASK_INFO(current)->owner_env),
+		current_thread_info(), current);
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
@@ -272,9 +290,9 @@ static void handle_BUG(struct pt_regs *r
 		goto no_bug;
 	if (ud2 != 0x0b0f)
 		goto no_bug;
-	if (__direct_get_user(line, (unsigned short *)(eip + 2)))
+	if (__direct_get_user(line, (unsigned short *)(eip + 4)))
 		goto bug;
-	if (__direct_get_user(file, (char **)(eip + 4)) ||
+	if (__direct_get_user(file, (char **)(eip + 7)) ||
 			__direct_get_user(c, file))
 		file = "<bad filename>";
 
@@ -289,6 +307,15 @@ bug:
 	printk("Kernel BUG\n");
 }
 
+static void inline check_kernel_csum_bug(void)
+{
+	if (kernel_text_csum_broken)
+		printk("Kernel code checksum mismatch detected %d times\n",
+			kernel_text_csum_broken);
+}
+
+int die_counter;
+
 void die(const char * str, struct pt_regs * regs, long err)
 {
 	static struct {
@@ -300,7 +327,6 @@ void die(const char * str, struct pt_reg
 		.lock_owner =		-1,
 		.lock_owner_depth =	0
 	};
-	static int die_counter;
 
 	if (die.lock_owner != smp_processor_id()) {
 		console_verbose();
@@ -330,6 +356,7 @@ void die(const char * str, struct pt_reg
 			printk("\n");
 	notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
 		show_registers(regs);
+		check_kernel_csum_bug();
 		try_crashdump(regs);
   	} else
 		printk(KERN_ERR "Recursive die() failure, output suppressed\n");
@@ -679,11 +706,15 @@ void die_nmi (struct pt_regs *regs, cons
 	printk(" on CPU%d, eip %08lx, registers:\n",
 		smp_processor_id(), regs->eip);
 	show_registers(regs);
+	smp_nmi_call_function(smp_show_regs, NULL, 1);
+	bust_spinlocks(1);
 	try_crashdump(regs);
-	printk("console shuts up ...\n");
+	printk("NMI occurs. ");
 	console_silent();
 	spin_unlock(&nmi_print_lock);
 	bust_spinlocks(0);
+	nmi_exit();
+	local_irq_enable();
 	do_exit(SIGSEGV);
 }
 
@@ -725,6 +756,7 @@ static int dummy_nmi_callback(struct pt_
 }
  
 static nmi_callback_t nmi_callback = dummy_nmi_callback;
+static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
  
 asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
 {
@@ -738,9 +770,20 @@ asmlinkage void do_nmi(struct pt_regs * 
 	if (!nmi_callback(regs, cpu))
 		default_do_nmi(regs);
 
+	nmi_ipi_callback(regs, cpu);
 	nmi_exit();
 }
 
+void set_nmi_ipi_callback(nmi_callback_t callback)
+{
+	nmi_ipi_callback = callback;
+}
+
+void unset_nmi_ipi_callback(void)
+{
+	nmi_ipi_callback = dummy_nmi_callback;
+}
+
 void set_nmi_callback(nmi_callback_t callback)
 {
 	nmi_callback = callback;
@@ -1079,14 +1122,13 @@ void __init trap_init_virtual_GDT(void)
 
 #ifdef CONFIG_X86_HIGH_ENTRY
 	if (!cpu) {
+		int i;
 		__set_fixmap(FIX_GDT_0, __pa(cpu_gdt_table), PAGE_KERNEL);
 		__set_fixmap(FIX_GDT_1, __pa(cpu_gdt_table) + PAGE_SIZE, PAGE_KERNEL);
-		__set_fixmap(FIX_TSS_0, __pa(init_tss), PAGE_KERNEL);
-		__set_fixmap(FIX_TSS_1, __pa(init_tss) + 1*PAGE_SIZE, PAGE_KERNEL);
-		__set_fixmap(FIX_TSS_2, __pa(init_tss) + 2*PAGE_SIZE, PAGE_KERNEL);
-		__set_fixmap(FIX_TSS_3, __pa(init_tss) + 3*PAGE_SIZE, PAGE_KERNEL);
+		for(i = 0; i < FIX_TSS_COUNT; i++)
+			__set_fixmap(FIX_TSS_0 - i, __pa(init_tss) + i * PAGE_SIZE, PAGE_KERNEL);
 	}
-
+	
 	gdt_desc->address = __fix_to_virt(FIX_GDT_0) + sizeof(cpu_gdt_table[0]) * cpu;
 #else
 	gdt_desc->address = (unsigned long)cpu_gdt_table[cpu];
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/vsyscall-sigreturn.S linux-2.6.9-ve023stab054/arch/i386/kernel/vsyscall-sigreturn.S
--- linux-2.6.9-100.orig/arch/i386/kernel/vsyscall-sigreturn.S	2011-06-09 19:22:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/vsyscall-sigreturn.S	2011-06-15 19:26:19.000000000 +0400
@@ -15,7 +15,7 @@
 */
 
 	.text
-	.org	__kernel_vsyscall+32
+	.org	__kernel_vsyscall+0x100,0x90
 	.globl __kernel_sigreturn
 	.type __kernel_sigreturn,@function
 __kernel_sigreturn:
@@ -27,6 +27,7 @@ __kernel_sigreturn:
 	.size __kernel_sigreturn,.-.LSTART_sigreturn
 
 	.balign 32
+	.org	__kernel_vsyscall+0x200,0x90
 	.globl __kernel_rt_sigreturn
 	.type __kernel_rt_sigreturn,@function
 __kernel_rt_sigreturn:
diff -Nurap linux-2.6.9-100.orig/arch/i386/kernel/vsyscall-sysenter.S linux-2.6.9-ve023stab054/arch/i386/kernel/vsyscall-sysenter.S
--- linux-2.6.9-100.orig/arch/i386/kernel/vsyscall-sysenter.S	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/kernel/vsyscall-sysenter.S	2011-06-15 19:26:19.000000000 +0400
@@ -26,12 +26,12 @@ __kernel_vsyscall:
 	movl %esp,%ebp
 	sysenter
 
-	/* 7: align return point with nop's to make disassembly easier */
-	.space 7,0x90
+	/* 17: align return point with nop's to make disassembly easier */
+	.space 13,0x90
 
-	/* 14: System call restart point is here! (SYSENTER_RETURN_OFFSET-2) */
+	/* 30: System call restart point is here! (SYSENTER_RETURN_OFFSET-2) */
 	jmp .Lenter_kernel
-	/* 16: System call normal return point is here! */
+	/* 32: System call normal return point is here! */
 	.globl SYSENTER_RETURN_OFFSET	/* Symbol used by sysenter.c  */
 SYSENTER_RETURN_OFFSET:
 	pop %ebp
diff -Nurap linux-2.6.9-100.orig/arch/i386/mm/fault.c linux-2.6.9-ve023stab054/arch/i386/mm/fault.c
--- linux-2.6.9-100.orig/arch/i386/mm/fault.c	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/mm/fault.c	2011-06-15 19:26:19.000000000 +0400
@@ -31,32 +31,6 @@
 extern void die(const char *,struct pt_regs *,long);
 
 /*
- * Unlock any spinlocks which will prevent us from getting the
- * message out 
- */
-void bust_spinlocks(int yes)
-{
-	int loglevel_save = console_loglevel;
-
-	if (yes) {
-		oops_in_progress = 1;
-		return;
-	}
-#ifdef CONFIG_VT
-	unblank_screen();
-#endif
-	oops_in_progress = 0;
-	/*
-	 * OK, the message is on the console.  Now we call printk()
-	 * without oops_in_progress set so that printk will give klogd
-	 * a poke.  Hold onto your hats...
-	 */
-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
-	printk(" ");
-	console_loglevel = loglevel_save;
-}
-
-/*
  * Return EIP plus the CS segment base.  The segment limit is also
  * adjusted, clamped to the kernel/user address space (whichever is
  * appropriate), and returned in *eip_limit.
@@ -146,12 +120,16 @@ static inline unsigned long get_segment_
  */
 static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
 { 
+	mm_segment_t oldfs;
 	unsigned long limit;
 	unsigned long instr = get_segment_eip (regs, &limit);
 	int scan_more = 1;
 	int prefetch = 0; 
 	int i;
 
+	oldfs = get_fs();
+	if ((regs->xcs & 0xffff) == __KERNEL_CS)
+		set_fs(KERNEL_DS);
 	for (i = 0; scan_more && i < 15; i++) { 
 		unsigned char opcode;
 		unsigned char instr_hi;
@@ -196,6 +174,7 @@ static int __is_prefetch(struct pt_regs 
 			break;
 		} 
 	}
+	set_fs(oldfs);
 	return prefetch;
 }
 
@@ -246,6 +225,20 @@ asmlinkage void do_page_fault(struct pt_
 
 	tsk = current;
 
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	/* Debugging check for stack overflow: is there less than 1KB free? */
+	{
+		long esp;
+
+		__asm__ __volatile__("andl %%esp,%0" :
+					"=r" (esp) : "0" (THREAD_SIZE - 1));
+		if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+			printk("do_page_fault: stack overflow: %ld\n",
+				esp - sizeof(struct thread_info));
+			dump_stack();
+		}
+	}
+#endif
 	info.si_code = SEGV_MAPERR;
 
 	/*
@@ -359,7 +352,6 @@ good_area:
 				goto bad_area;
 	}
 
- survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -498,14 +490,14 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (tsk->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
+	if (error_code & 4) {
+		/* 
+		 * 0-order allocation always success if something really 
+		 * fatal not happen: beancounter overdraft or OOM. Den 
+		 */
+		force_sig(SIGKILL, tsk);
+		return;
 	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
-		do_exit(SIGKILL);
 	goto no_context;
 
 do_sigbus:
diff -Nurap linux-2.6.9-100.orig/arch/i386/mm/hugetlbpage.c linux-2.6.9-ve023stab054/arch/i386/mm/hugetlbpage.c
--- linux-2.6.9-100.orig/arch/i386/mm/hugetlbpage.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/mm/hugetlbpage.c	2011-06-15 19:26:19.000000000 +0400
@@ -18,6 +18,8 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
@@ -43,6 +45,7 @@ static void set_huge_pte(struct mm_struc
 	pte_t entry;
 
 	mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+	ub_unused_privvm_dec(mm_ub(mm), HPAGE_SIZE / PAGE_SIZE, vma);
 	if (write_access) {
 		entry =
 		    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
@@ -83,6 +86,7 @@ int copy_hugetlb_page_range(struct mm_st
 		get_page(ptepage);
 		set_pte(dst_pte, entry);
 		dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+		ub_unused_privvm_dec(mm_ub(dst), HPAGE_SIZE / PAGE_SIZE, vma);
 		addr += HPAGE_SIZE;
 	}
 	return 0;
@@ -219,6 +223,7 @@ void unmap_hugepage_range(struct vm_area
 		put_page(page);
 	}
 	mm->rss -= (end - start) >> PAGE_SHIFT;
+	ub_unused_privvm_inc(mm_ub(mm), (end - start) >> PAGE_SHIFT, vma);
 	flush_tlb_range(vma, start, end);
 }
 
diff -Nurap linux-2.6.9-100.orig/arch/i386/mm/init.c linux-2.6.9-ve023stab054/arch/i386/mm/init.c
--- linux-2.6.9-100.orig/arch/i386/mm/init.c	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/mm/init.c	2011-06-15 19:26:19.000000000 +0400
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/efi.h>
+#include <linux/initrd.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -40,8 +41,14 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/desc.h>
+#include <asm/setup.h>
 
-unsigned int __VMALLOC_RESERVE = 128 << 20;
+unsigned int __VMALLOC_RESERVE = 
+#ifdef CONFIG_X86_4G
+			(320 << 20);
+#else
+			(128 << 20);
+#endif
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
@@ -392,9 +399,29 @@ static void clear_mappings(pgd_t *pgd_ba
 	flush_tlb_all();
 }
 
+#ifdef CONFIG_SOFTWARE_SUSPEND
+/*      
+ * Swap suspend & friends need this for resume because things like the intel-agp
+ * driver might have split up a kernel 4MB mapping.
+ */
+char __nosavedata swsusp_pg_dir[PAGE_SIZE]
+	__attribute__ ((aligned (PAGE_SIZE)));
+
+static inline void save_pg_dir(void)
+{
+	memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
+}
+#else   
+static inline void save_pg_dir(void)
+{
+}
+#endif
+
 void zap_low_mappings(void)
 {
 	printk("zapping low mappings.\n");
+
+	save_pg_dir();
 	/*
 	 * Zap initial low-memory mappings.
 	 */
@@ -587,6 +614,37 @@ extern void set_max_mapnr_init(void);
 
 static struct kcore_list kcore_mem, kcore_vmalloc; 
 
+#ifdef CONFIG_BLK_DEV_INITRD
+/*
+ * This function move initrd from highmem to normal zone, if needed.
+ * Note, we have to do it before highmem pages are given to buddy allocator.
+ */
+static void initrd_move(void)
+{
+	unsigned long i, start, off;
+	struct page *page;
+	void *addr;
+
+	if (initrd_copy <= 0)
+		return;
+
+	initrd_start = (unsigned long)
+			alloc_bootmem_low_pages(PAGE_ALIGN(INITRD_SIZE));
+	initrd_end = INITRD_START + initrd_copy;
+	start = (initrd_end - initrd_copy) & PAGE_MASK;
+	off = (initrd_end - initrd_copy) & ~PAGE_MASK;
+	for (i = 0; i < initrd_copy; i += PAGE_SIZE) {
+		page = pfn_to_page((start + i) >> PAGE_SHIFT);
+		addr = kmap_atomic(page, KM_USER0);
+		memcpy((void *)initrd_start + i,
+			addr, PAGE_SIZE);
+		kunmap_atomic(addr, KM_USER0);
+	}
+	initrd_start += off;
+	initrd_end = initrd_start + initrd_copy;
+}
+#endif
+
 void __init mem_init(void)
 {
 	extern int ppro_with_ram_bug(void);
@@ -619,6 +677,9 @@ void __init mem_init(void)
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 #endif
 
+#ifdef CONFIG_BLK_DEV_INITRD
+	initrd_move();
+#endif
 	/* this will put all low memory onto the freelists */
 	totalram_pages += __free_all_bootmem();
 
@@ -673,7 +734,7 @@ void __init pgtable_cache_init(void)
 		pmd_cache = kmem_cache_create("pmd",
 					PTRS_PER_PMD*sizeof(pmd_t),
 					PTRS_PER_PMD*sizeof(pmd_t),
-					0,
+					SLAB_UBC,
 					pmd_ctor,
 					NULL);
 		if (!pmd_cache)
@@ -683,7 +744,7 @@ void __init pgtable_cache_init(void)
 			kpmd_cache = kmem_cache_create("kpmd",
 					PTRS_PER_PMD*sizeof(pmd_t),
 					PTRS_PER_PMD*sizeof(pmd_t),
-					0,
+					SLAB_UBC,
 					kpmd_ctor,
 					NULL);
 			if (!kpmd_cache)
@@ -705,7 +766,7 @@ void __init pgtable_cache_init(void)
 	pgd_cache = kmem_cache_create("pgd",
 				PTRS_PER_PGD*sizeof(pgd_t),
 				PTRS_PER_PGD*sizeof(pgd_t),
-				0,
+				SLAB_UBC,
 				ctor,
 				dtor);
 	if (!pgd_cache)
diff -Nurap linux-2.6.9-100.orig/arch/i386/mm/pgtable.c linux-2.6.9-ve023stab054/arch/i386/mm/pgtable.c
--- linux-2.6.9-100.orig/arch/i386/mm/pgtable.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/i386/mm/pgtable.c	2011-06-15 19:26:19.000000000 +0400
@@ -5,8 +5,10 @@
 #include <linux/config.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
+#include <linux/vmalloc.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
 #include <linux/highmem.h>
@@ -55,6 +57,7 @@ void show_mem(void)
 	printk("%d reserved pages\n",reserved);
 	printk("%d pages shared\n",shared);
 	printk("%d pages swap cached\n",cached);
+	vprintstat();
 }
 
 EXPORT_SYMBOL_GPL(show_mem);
@@ -162,9 +165,10 @@ struct page *pte_alloc_one(struct mm_str
 	struct page *pte;
 
 #ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
+	pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|
+			__GFP_HIGHMEM|__GFP_REPEAT, 0);
 #else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT, 0);
 #endif
 	if (pte)
 		clear_highpage(pte);
@@ -234,9 +238,10 @@ void pgd_ctor(void *__pgd, kmem_cache_t 
 	unsigned long flags;
 
 	if (PTRS_PER_PMD == 1) {
-		if (TASK_SIZE <= PAGE_OFFSET)
+		if (TASK_SIZE <= PAGE_OFFSET) {
+			memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
 			spin_lock_irqsave(&pgd_lock, flags);
-		else
+		} else
 			memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS],
 				&swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS],
 				NR_SHARED_PMDS*sizeof(pgd_t));
@@ -255,7 +260,6 @@ void pgd_ctor(void *__pgd, kmem_cache_t 
 	else {
 		pgd_list_add(pgd);
 		spin_unlock_irqrestore(&pgd_lock, flags);
-		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
 	}
 }
 
diff -Nurap linux-2.6.9-100.orig/arch/ia64/ia32/binfmt_elf32.c linux-2.6.9-ve023stab054/arch/ia64/ia32/binfmt_elf32.c
--- linux-2.6.9-100.orig/arch/ia64/ia32/binfmt_elf32.c	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/ia32/binfmt_elf32.c	2011-06-15 19:26:19.000000000 +0400
@@ -18,6 +18,8 @@
 #include <asm/param.h>
 #include <asm/signal.h>
 
+#include <ub/ub_vmpages.h>
+
 #include "ia32priv.h"
 #include "elfcore32.h"
 
@@ -140,6 +142,11 @@ ia64_elf32_init (struct pt_regs *regs)
 	 * Install LDT as anonymous memory.  This gives us all-zero segment descriptors
 	 * until a task modifies them via modify_ldt().
 	 */
+	if (ub_memory_charge(mm_ub(current->mm), 
+			PAGE_ALIGN(IA32_LDT_ENTRIES * IA32_LDT_ENTRY_SIZE),
+			VM_WRITE, NULL, UB_SOFT))
+		return;
+
 	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (vma) {
 		memset(vma, 0, sizeof(*vma));
@@ -153,11 +160,18 @@ ia64_elf32_init (struct pt_regs *regs)
 			if (insert_vm_struct(current->mm, vma)) {
 				kmem_cache_free(vm_area_cachep, vma);
 				up_write(&current->mm->mmap_sem);
+				ub_memory_uncharge(mm_ub(current->mm),
+					PAGE_ALIGN(IA32_LDT_ENTRIES *
+						IA32_LDT_ENTRY_SIZE),
+					VM_WRITE, NULL);
 				return;
 			}
 		}
 		up_write(&current->mm->mmap_sem);
-	}
+	} else
+		ub_memory_uncharge(mm_ub(current->mm),
+			PAGE_ALIGN(IA32_LDT_ENTRIES * IA32_LDT_ENTRY_SIZE),
+			VM_WRITE, NULL);
 
 	ia64_psr(regs)->ac = 0;		/* turn off alignment checking */
 	regs->loadrs = 0;
@@ -199,7 +213,7 @@ ia64_elf32_init (struct pt_regs *regs)
 int
 ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
 {
-	unsigned long stack_base;
+	unsigned long stack_base, vm_end, vm_start;
 	struct vm_area_struct *mpnt;
 	struct mm_struct *mm = current->mm;
 	int i, ret;
@@ -212,9 +226,17 @@ ia32_setup_arg_pages (struct linux_binpr
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
+	vm_end = IA32_STACK_TOP;
+	vm_start = PAGE_MASK & (unsigned long)bprm->p;
+
+	ret = -ENOMEM;
+	if (ub_memory_charge(mm_ub(mm), vm_end - vm_start, VM_STACK_FLAGS,
+			NULL, UB_HARD))
+		goto out;
+
 	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (!mpnt)
-		return -ENOMEM;
+		goto out_uncharge;
 
 	memset(mpnt, 0, sizeof(*mpnt));
 
@@ -231,11 +253,8 @@ ia32_setup_arg_pages (struct linux_binpr
 			mpnt->vm_flags = VM_STACK_FLAGS;
 		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
 					PAGE_COPY_EXEC: PAGE_COPY;
-		if ((ret = insert_vm_struct(current->mm, mpnt))) {
-			up_write(&current->mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, mpnt);
-			return ret;
-		}
+		if ((ret = insert_vm_struct(current->mm, mpnt)))
+			goto out_up_free;
 		current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
 	}
 
@@ -254,6 +273,14 @@ ia32_setup_arg_pages (struct linux_binpr
 	current->thread.ppl = ia32_init_pp_list();
 
 	return 0;
+
+out_up_free:
+	up_write(&current->mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, mpnt);
+out_uncharge:
+	ub_memory_uncharge(mm_ub(mm), vm_end - vm_start, VM_STACK_FLAGS, NULL);
+out:
+	return ret;
 }
 
 static void
diff -Nurap linux-2.6.9-100.orig/arch/ia64/ia32/ia32_entry.S linux-2.6.9-ve023stab054/arch/ia64/ia32/ia32_entry.S
--- linux-2.6.9-100.orig/arch/ia64/ia32/ia32_entry.S	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/ia32/ia32_entry.S	2011-06-15 19:26:18.000000000 +0400
@@ -341,7 +341,7 @@ ia32_syscall_table:
 	data8 sys_ni_syscall	/* init_module */
 	data8 sys_ni_syscall	/* delete_module */
 	data8 sys_ni_syscall	/* get_kernel_syms */  /* 130 */
-	data8 sys_quotactl
+	data8 sys32_quotactl
 	data8 sys_getpgid
 	data8 sys_fchdir
 	data8 sys_ni_syscall	/* sys_bdflush */
diff -Nurap linux-2.6.9-100.orig/arch/ia64/ia32/sys_ia32.c linux-2.6.9-ve023stab054/arch/ia64/ia32/sys_ia32.c
--- linux-2.6.9-100.orig/arch/ia64/ia32/sys_ia32.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/ia32/sys_ia32.c	2011-06-15 19:26:19.000000000 +0400
@@ -769,7 +769,7 @@ emulate_mmap (struct file *file, unsigne
 		ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
 		if (start > pstart) {
 			if (flags & MAP_SHARED)
-				printk(KERN_INFO
+				ve_printk(VE_LOG, KERN_INFO
 				       "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
 				       current->comm, current->pid, start);
 			ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
@@ -782,7 +782,7 @@ emulate_mmap (struct file *file, unsigne
 		}
 		if (end < pend) {
 			if (flags & MAP_SHARED)
-				printk(KERN_INFO
+				ve_printk(VE_LOG, KERN_INFO
 				       "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
 				       current->comm, current->pid, end);
 			ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
@@ -813,7 +813,7 @@ emulate_mmap (struct file *file, unsigne
 	is_congruent = (flags & MAP_ANONYMOUS) || (offset_in_page(poff) == 0);
 
 	if ((flags & MAP_SHARED) && !is_congruent)
-		printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
+		ve_printk(VE_LOG, KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
 		       "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off);
 
 	DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
@@ -1500,7 +1500,7 @@ getreg (struct task_struct *child, int r
 		return __USER_DS;
 	      case PT_CS: return __USER_CS;
 	      default:
-		printk(KERN_ERR "ia32.getreg(): unknown register %d\n", regno);
+		ve_printk(VE_LOG, KERN_ERR "ia32.getreg(): unknown register %d\n", regno);
 		break;
 	}
 	return 0;
@@ -1526,18 +1526,18 @@ putreg (struct task_struct *child, int r
 	      case PT_EFL: child->thread.eflag = value; break;
 	      case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS:
 		if (value != __USER_DS)
-			printk(KERN_ERR
+			ve_printk(VE_LOG, KERN_ERR
 			       "ia32.putreg: attempt to set invalid segment register %d = %x\n",
 			       regno, value);
 		break;
 	      case PT_CS:
 		if (value != __USER_CS)
-			printk(KERN_ERR
+			ve_printk(VE_LOG, KERN_ERR
 			       "ia32.putreg: attempt to to set invalid segment register %d = %x\n",
 			       regno, value);
 		break;
 	      default:
-		printk(KERN_ERR "ia32.putreg: unknown register %d\n", regno);
+		ve_printk(VE_LOG, KERN_ERR "ia32.putreg: unknown register %d\n", regno);
 		break;
 	}
 }
@@ -1778,7 +1778,7 @@ sys32_ptrace (int request, pid_t pid, un
 
 	ret = -ESRCH;
 	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
+	child = find_task_by_pid_ve(pid);
 	if (child)
 		get_task_struct(child);
 	read_unlock(&tasklist_lock);
@@ -2337,7 +2337,7 @@ sys32_sendfile (int out_fd, int in_fd, i
 	ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *) &of : NULL, count);
 	set_fs(old_fs);
 
-	if (!ret && offset && put_user(of, offset))
+	if (offset && put_user(of, offset))
 		return -EFAULT;
 
 	return ret;
@@ -2669,6 +2669,54 @@ asmlinkage long sys32_waitid(int which, 
 	return copy_siginfo_to_user32(uinfo, &info);
 }
 
+asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
+						qid_t id, void __user *addr)
+{
+	long ret;
+	unsigned int cmds;
+	mm_segment_t old_fs;
+	struct if_dqblk dqblk;
+	struct if32_dqblk {
+		__u32 dqb_bhardlimit[2];
+		__u32 dqb_bsoftlimit[2];
+		__u32 dqb_curspace[2];
+		__u32 dqb_ihardlimit[2];
+		__u32 dqb_isoftlimit[2];
+		__u32 dqb_curinodes[2];
+		__u32 dqb_btime[2];
+		__u32 dqb_itime[2];
+		__u32 dqb_valid;
+	} dqblk32;
+
+	cmds = cmd >> SUBCMDSHIFT;
+
+	switch (cmds) {
+		case Q_GETQUOTA:
+			old_fs = get_fs();
+			set_fs(KERNEL_DS);
+			ret = sys_quotactl(cmd, special, id, &dqblk);
+			set_fs(old_fs);
+			memcpy(&dqblk32, &dqblk, sizeof(dqblk32));
+			dqblk32.dqb_valid = dqblk.dqb_valid;
+			if (copy_to_user(addr, &dqblk32, sizeof(dqblk32)))
+				return -EFAULT;
+			break;
+		case Q_SETQUOTA:
+			if (copy_from_user(&dqblk32, addr, sizeof(dqblk32)))
+				return -EFAULT;
+			memcpy(&dqblk, &dqblk32, sizeof(dqblk32));
+			dqblk.dqb_valid = dqblk32.dqb_valid;
+			old_fs = get_fs();
+			set_fs(KERNEL_DS);
+			ret = sys_quotactl(cmd, special, id, &dqblk);
+			set_fs(old_fs);
+			break;
+		default:
+			return sys32compat_quotactl(cmd, special, id, addr);
+	}
+	return ret;
+}
+
 #ifdef	NOTYET  /* UNTESTED FOR IA64 FROM HERE DOWN */
 
 asmlinkage long sys32_setreuid(compat_uid_t ruid, compat_uid_t euid)
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/asm-offsets.c linux-2.6.9-ve023stab054/arch/ia64/kernel/asm-offsets.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/asm-offsets.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/asm-offsets.c	2011-06-15 19:26:19.000000000 +0400
@@ -44,11 +44,21 @@ void foo(void)
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
 	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
+#ifdef CONFIG_VE
+	DEFINE(IA64_TASK_PID_OFFSET, offsetof
+			(struct task_struct, pids[PIDTYPE_PID].vnr));
+#else
 	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
+#endif
 	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
 	DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
 	DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
+#ifdef CONFIG_VE
+	DEFINE(IA64_TASK_TGID_OFFSET, offsetof
+			(struct task_struct, pids[PIDTYPE_TGID].vnr));
+#else
 	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
+#endif
 	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
 	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
 	DEFINE(IA64_THREAD_INFO_CPU_OFFSET, offsetof (struct thread_info, cpu));
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/entry.S linux-2.6.9-ve023stab054/arch/ia64/kernel/entry.S
--- linux-2.6.9-100.orig/arch/ia64/kernel/entry.S	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/entry.S	2011-06-15 19:26:22.000000000 +0400
@@ -1586,5 +1586,20 @@ sys_call_table:
 	data8 sys_ni_syscall
 	data8 sys_ni_syscall
 	data8 sys_getcpu		// 1304
+.rept 1499-1305
+	data8 sys_ni_syscall		// 1305 - 1498
+.endr
+	data8 sys_fairsched_vcpus
+	data8 sys_fairsched_mknod	// 1500
+	data8 sys_fairsched_rmnod
+	data8 sys_fairsched_chwt
+	data8 sys_fairsched_mvpr
+	data8 sys_fairsched_rate
+	data8 sys_getluid		// 1505
+	data8 sys_setluid
+	data8 sys_setublimit
+	data8 sys_ubstat
+	data8 sys_lchmod
+	data8 sys_lutime			// 1510
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/entry.h linux-2.6.9-ve023stab054/arch/ia64/kernel/entry.h
--- linux-2.6.9-100.orig/arch/ia64/kernel/entry.h	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/entry.h	2011-06-15 19:26:18.000000000 +0400
@@ -60,7 +60,7 @@
 	.spillsp @priunat,SW(AR_UNAT)+16+(off);					\
 	.spillsp ar.rnat,SW(AR_RNAT)+16+(off);					\
 	.spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off);				\
-	.spillsp pr,SW(PR)+16+(off))
+	.spillsp pr,SW(PR)+16+(off)
 
 #define DO_SAVE_SWITCH_STACK			\
 	movl r28=1f;				\
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/fsys.S linux-2.6.9-ve023stab054/arch/ia64/kernel/fsys.S
--- linux-2.6.9-100.orig/arch/ia64/kernel/fsys.S	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/fsys.S	2011-06-15 19:26:19.000000000 +0400
@@ -74,6 +74,7 @@ ENTRY(fsys_getpid)
 	FSYS_RETURN
 END(fsys_getpid)
 
+#ifndef CONFIG_VE
 ENTRY(fsys_getppid)
 	.prologue
 	.altrp b6
@@ -120,6 +121,7 @@ ENTRY(fsys_getppid)
 #endif
 	FSYS_RETURN
 END(fsys_getppid)
+#endif
 
 ENTRY(fsys_set_tid_address)
 	.prologue
@@ -701,7 +703,11 @@ fsyscall_table:
 	data8 0				// chown
 	data8 0				// lseek		// 1040
 	data8 fsys_getpid		// getpid
+#ifdef CONFIG_VE
+	data8 0				// getppid
+#else
 	data8 fsys_getppid		// getppid
+#endif
 	data8 0				// mount
 	data8 0				// umount
 	data8 0				// setuid		// 1045
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/ia64_ksyms.c linux-2.6.9-ve023stab054/arch/ia64/kernel/ia64_ksyms.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/ia64_ksyms.c	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/ia64_ksyms.c	2011-06-15 19:26:21.000000000 +0400
@@ -95,6 +95,8 @@ EXPORT_SYMBOL(xor_ia64_4);
 EXPORT_SYMBOL(xor_ia64_5);
 #endif
 
+EXPORT_SYMBOL(empty_zero_page);
+
 #include <asm/pal.h>
 EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
 EXPORT_SYMBOL(ia64_pal_call_phys_static);
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/irq.c linux-2.6.9-ve023stab054/arch/ia64/kernel/irq.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/irq.c	2004-10-19 01:55:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/irq.c	2011-06-15 19:26:19.000000000 +0400
@@ -56,6 +56,8 @@
 #include <asm/delay.h>
 #include <asm/irq.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_task.h>
 
 /*
  * Linux has a controller-independent x86 interrupt architecture.
@@ -258,10 +260,12 @@ int handle_IRQ_event(unsigned int irq,
 {
 	int status = 1;	/* Force the "do bottom halves" bit */
 	int ret, retval = 0;
+	struct user_beancounter *ub;
 
 	if (!(action->flags & SA_INTERRUPT))
 		local_irq_enable();
 
+	ub = set_exec_ub(get_ub0());
 	do {
 		ret = action->handler(irq, action->dev_id, regs);
 		if (ret == IRQ_HANDLED)
@@ -269,6 +273,7 @@ int handle_IRQ_event(unsigned int irq,
 		retval |= ret;
 		action = action->next;
 	} while (action);
+	(void)set_exec_ub(ub);
 	if (status & SA_SAMPLE_RANDOM)
 		add_interrupt_randomness(irq);
 	local_irq_disable();
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/irq_ia64.c linux-2.6.9-ve023stab054/arch/ia64/kernel/irq_ia64.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/irq_ia64.c	2011-06-09 19:22:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/irq_ia64.c	2011-06-15 19:26:19.000000000 +0400
@@ -120,6 +120,7 @@ void
 ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 {
 	unsigned long saved_tpr;
+	struct ve_struct *ve;
 
 #if IRQ_DEBUG
 	{
@@ -156,6 +157,12 @@ ia64_handle_irq (ia64_vector vector, str
 	 * 16 (without this, it would be ~240, which could easily lead
 	 * to kernel stack overflows).
 	 */
+
+#ifdef CONFIG_HOTPLUG_CPU
+#warning "Fix fixup_irqs & ia64_process_pending_intr to set correct env and ub!"
+#endif
+
+	ve = set_exec_env(get_ve0());
 	irq_enter();
 	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
 	ia64_srlz_d();
@@ -181,6 +188,7 @@ ia64_handle_irq (ia64_vector vector, str
 	 * come through until ia64_eoi() has been done.
 	 */
 	irq_exit();
+	(void)set_exec_env(ve);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/mca.c linux-2.6.9-ve023stab054/arch/ia64/kernel/mca.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/mca.c	2011-06-09 19:22:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/mca.c	2011-06-15 19:26:19.000000000 +0400
@@ -533,13 +533,13 @@ init_handler_platform (pal_min_state_are
 #endif
 	{
 		struct task_struct *g, *t;
-		do_each_thread (g, t) {
+		do_each_thread_all(g, t) {
 			if (t == current)
 				continue;
 
 			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
 			show_stack(t, NULL);
-		} while_each_thread (g, t);
+		} while_each_thread_all(g, t);
 	}
 #ifdef CONFIG_SMP
 	if (!tasklist_lock.write_lock)
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/perfmon.c linux-2.6.9-ve023stab054/arch/ia64/kernel/perfmon.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/perfmon.c	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/perfmon.c	2011-06-15 19:26:19.000000000 +0400
@@ -2631,7 +2631,7 @@ pfm_get_task(pfm_context_t *ctx, pid_t p
 
 		read_lock(&tasklist_lock);
 
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		/* make sure task cannot go away while we operate on it */
 		if (p) get_task_struct(p);
@@ -4186,12 +4186,12 @@ pfm_check_task_exist(pfm_context_t *ctx)
 
 	read_lock(&tasklist_lock);
 
-	do_each_thread (g, t) {
+	do_each_thread_ve(g, t) {
 		if (t->thread.pfm_context == ctx) {
 			ret = 0;
 			break;
 		}
-	} while_each_thread (g, t);
+	} while_each_thread_ve(g, t);
 
 	read_unlock(&tasklist_lock);
 
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/process.c linux-2.6.9-ve023stab054/arch/ia64/kernel/process.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/process.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/process.c	2011-06-15 19:26:22.000000000 +0400
@@ -26,6 +26,7 @@
 #include <linux/efi.h>
 #include <linux/interrupt.h>
 #include <linux/fcntl.h>
+#include <linux/sysctl.h>
 
 #include <asm/cpu.h>
 #include <asm/delay.h>
@@ -85,6 +86,8 @@ show_stack (struct task_struct *task, un
 	}
 }
 
+EXPORT_SYMBOL(show_stack);
+
 void
 dump_stack (void)
 {
@@ -644,6 +647,13 @@ kernel_thread (int (*fn)(void *), void *
 		struct pt_regs pt;
 	} regs;
 
+	/* Don't allow kernel_thread() inside VE */
+	if (!ve_allow_kthreads && !ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside VE\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
 	regs.pt.r1 = helper_fptr[1];		/* set GP */
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/ptrace.c linux-2.6.9-ve023stab054/arch/ia64/kernel/ptrace.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/ptrace.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/ptrace.c	2011-06-15 19:26:22.000000000 +0400
@@ -564,7 +564,7 @@ find_thread_for_addr (struct task_struct
 			goto out;
 	} while ((p = next_thread(p)) != child);
 
-	do_each_thread(g, p) {
+	do_each_thread_ve(g, p) {
 		if (child->mm != mm)
 			continue;
 
@@ -572,7 +572,7 @@ find_thread_for_addr (struct task_struct
 			child = p;
 			goto out;
 		}
-	} while_each_thread(g, p);
+	} while_each_thread_ve(g, p);
   out:
 	mmput(mm);
 	return child;
@@ -1292,14 +1292,17 @@ sys_ptrace (long request, pid_t pid, uns
 	lock_kernel();
 	ret = -EPERM;
 	if (request == PTRACE_TRACEME) {
+		task_lock(current);
 		/* are we already being traced? */
 		if (current->ptrace & PT_PTRACED)
-			goto out;
+			goto out_task_unlock;
 		ret = security_ptrace(current->parent, current);
 		if (ret)
-			goto out;
+			goto out_task_unlock;
 		current->ptrace |= PT_PTRACED;
 		ret = 0;
+out_task_unlock:
+		task_unlock(current);
 		goto out;
 	}
 
@@ -1312,7 +1315,7 @@ sys_ptrace (long request, pid_t pid, uns
 	ret = -ESRCH;
 	read_lock(&tasklist_lock);
 	{
-		child = find_task_by_pid(pid);
+		child = find_task_by_pid_ve(pid);
 		if (child) {
 			if (peek_or_poke) {
 				rbs_child = find_thread_for_addr(child, addr);
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/setup.c linux-2.6.9-ve023stab054/arch/ia64/kernel/setup.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/setup.c	2011-06-09 19:22:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/setup.c	2011-06-15 19:26:22.000000000 +0400
@@ -40,6 +40,7 @@
 #include <linux/serial_core.h>
 #include <linux/efi.h>
 #include <linux/initrd.h>
+#include <linux/vsched.h>
 
 #include <asm/ia32.h>
 #include <asm/machvec.h>
@@ -466,6 +467,13 @@ show_cpuinfo (struct seq_file *m, void *
 #endif
 	unsigned long mask;
 	int i;
+	unsigned long vcpu_khz;
+
+#ifdef CONFIG_FAIRSCHED
+	vcpu_khz = ve_scale_khz(c->proc_freq);
+#else
+	vcpu_khz = c->proc_freq;
+#endif
 
 	mask = c->features;
 
@@ -514,7 +522,7 @@ show_cpuinfo (struct seq_file *m, void *
 		   "BogoMIPS   : %lu.%02lu\n",
 		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
 		   features, c->ppn, c->number,
-		   c->proc_freq / 1000000, c->proc_freq % 1000000,
+		   vcpu_khz / 1000000, vcpu_khz % 1000000,
 		   c->itc_freq / 1000000, c->itc_freq % 1000000,
 		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
 #ifdef CONFIG_SMP
@@ -536,7 +544,7 @@ static void *
 c_start (struct seq_file *m, loff_t *pos)
 {
 #ifdef CONFIG_SMP
-	while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
+	while (*pos < NR_CPUS && !vcpu_online(*pos))
 		++*pos;
 #endif
 	return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/signal.c linux-2.6.9-ve023stab054/arch/ia64/kernel/signal.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/signal.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/signal.c	2011-06-15 19:26:19.000000000 +0400
@@ -269,7 +269,7 @@ ia64_rt_sigreturn (struct sigscratch *sc
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = virt_pid(current);
 	si.si_uid = current->uid;
 	si.si_addr = sc;
 	force_sig_info(SIGSEGV, &si, current);
@@ -374,7 +374,7 @@ force_sigsegv_info (int sig, void __user
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = virt_pid(current);
 	si.si_uid = current->uid;
 	si.si_addr = addr;
 	force_sig_info(SIGSEGV, &si, current);
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/smpboot.c linux-2.6.9-ve023stab054/arch/ia64/kernel/smpboot.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/smpboot.c	2011-06-09 19:22:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/smpboot.c	2011-06-15 19:26:22.000000000 +0400
@@ -432,6 +432,7 @@ do_boot_cpu (int sapicid, int cpu)
 		printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
 		ia64_cpu_to_sapicid[cpu] = -1;
 		cpu_clear(cpu, cpu_online_map);  /* was set in smp_callin() */
+		fini_idle(cpu); /* undo fork_idle() */
 		return -EINVAL;
 	}
 	return 0;
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/time.c linux-2.6.9-ve023stab054/arch/ia64/kernel/time.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/time.c	2011-06-09 19:23:04.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/time.c	2011-06-15 19:26:19.000000000 +0400
@@ -36,6 +36,9 @@ u64 jiffies_64 = INITIAL_JIFFIES;
 
 EXPORT_SYMBOL(jiffies_64);
 
+unsigned int cpu_khz;					/* TSC clocks / usec, not used here */
+EXPORT_SYMBOL(cpu_khz);
+
 #define TIME_KEEPER_ID	0	/* smp_processor_id() of time-keeper */
 
 #ifdef CONFIG_IA64_DEBUG_IRQ
@@ -234,6 +237,8 @@ ia64_init_itm (void)
 		register_time_interpolator(&itc_interpolator);
 	}
 
+	cpu_khz = local_cpu_data->proc_freq / 1000;
+
 	/* Setup the CPU local timer tick */
 	ia64_cpu_local_tick();
 }
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/traps.c linux-2.6.9-ve023stab054/arch/ia64/kernel/traps.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/traps.c	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/traps.c	2011-06-15 19:26:18.000000000 +0400
@@ -51,34 +51,6 @@ trap_init (void)
 		fpswa_interface = __va(ia64_boot_param->fpswa);
 }
 
-/*
- * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
- * is acquired through the console unblank code)
- */
-void
-bust_spinlocks (int yes)
-{
-	int loglevel_save = console_loglevel;
-
-	if (yes) {
-		oops_in_progress = 1;
-		return;
-	}
-
-#ifdef CONFIG_VT
-	unblank_screen();
-#endif
-	oops_in_progress = 0;
-	/*
-	 * OK, the message is on the console.  Now we call printk() without
-	 * oops_in_progress set so that printk will give klogd a poke.  Hold onto
-	 * your hats...
-	 */
-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
-	printk(" ");
-	console_loglevel = loglevel_save;
-}
-
 void
 die (const char *str, struct pt_regs *regs, long err)
 {
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/unaligned.c linux-2.6.9-ve023stab054/arch/ia64/kernel/unaligned.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/unaligned.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/unaligned.c	2011-06-15 19:26:19.000000000 +0400
@@ -1290,7 +1290,7 @@ within_logging_rate_limit (void)
 {
 	static unsigned long count, last_time;
 
-	if (jiffies - last_time > 5*HZ)
+	if (jiffies - last_time > 60*HZ)
 		count = 0;
 	if (++count < 5) {
 		last_time = jiffies;
@@ -1349,7 +1349,7 @@ ia64_handle_unaligned (unsigned long ifa
 			if (user_mode(regs))
 				tty_write_message(current->signal->tty, buf);
 			buf[len-1] = '\0';	/* drop '\r' */
-			printk(KERN_WARNING "%s", buf);	/* watch for command names containing %s */
+			ve_printk(VE_LOG, KERN_WARNING "%s", buf);	/* watch for command names containing %s */
 		} else {
 			if (no_unaligned_warning && !noprint_warning) {
 				noprint_warning = 1;
diff -Nurap linux-2.6.9-100.orig/arch/ia64/kernel/unwind.c linux-2.6.9-ve023stab054/arch/ia64/kernel/unwind.c
--- linux-2.6.9-100.orig/arch/ia64/kernel/unwind.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/kernel/unwind.c	2011-06-15 19:26:18.000000000 +0400
@@ -362,7 +362,7 @@ unw_access_gr (struct unw_frame_info *in
 			if (info->pri_unat_loc)
 				nat_addr = info->pri_unat_loc;
 			else
-				nat_addr = &info->sw->ar_unat;
+				nat_addr = &info->sw->caller_unat;
 			nat_mask = (1UL << ((long) addr & 0x1f8)/8);
 		}
 	} else {
@@ -524,7 +524,7 @@ unw_access_ar (struct unw_frame_info *in
 	      case UNW_AR_UNAT:
 		addr = info->unat_loc;
 		if (!addr)
-			addr = &info->sw->ar_unat;
+			addr = &info->sw->caller_unat;
 		break;
 
 	      case UNW_AR_LC:
@@ -1775,7 +1775,7 @@ run_script (struct unw_script *script, s
 
 		      case UNW_INSN_SETNAT_MEMSTK:
 			if (!state->pri_unat_loc)
-				state->pri_unat_loc = &state->sw->ar_unat;
+				state->pri_unat_loc = &state->sw->caller_unat;
 			/* register off. is a multiple of 8, so the least 3 bits (type) are 0 */
 			s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK;
 			break;
@@ -2241,11 +2241,11 @@ unw_init (void)
 	if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE)
 		unw_hash_index_t_is_too_narrow();
 
-	unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(AR_UNAT);
+	unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(CALLER_UNAT);
 	unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE);
-	unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_UNAT);
+	unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_PFS);
 	unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0);
-	unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(AR_UNAT);
+	unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(CALLER_UNAT);
 	unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR);
 	unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC);
 	unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR);
diff -Nurap linux-2.6.9-100.orig/arch/ia64/lib/memcpy_mck.S linux-2.6.9-ve023stab054/arch/ia64/lib/memcpy_mck.S
--- linux-2.6.9-100.orig/arch/ia64/lib/memcpy_mck.S	2011-06-09 19:22:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/lib/memcpy_mck.S	2011-06-15 19:26:18.000000000 +0400
@@ -634,8 +634,11 @@ END(memcpy)
 	clrrrb
 	;;
 	alloc	saved_pfs_stack=ar.pfs,3,3,3,0
+	cmp.lt	p8,p0=A,r0
 	sub	B = dst0, saved_in0	// how many byte copied so far
 	;;
+(p8)	mov	A = 0;			// A shouldn't be negative, cap it
+	;;
 	sub	C = A, B
 	sub	D = saved_in2, A
 	;;
diff -Nurap linux-2.6.9-100.orig/arch/ia64/mm/discontig.c linux-2.6.9-ve023stab054/arch/ia64/mm/discontig.c
--- linux-2.6.9-100.orig/arch/ia64/mm/discontig.c	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/mm/discontig.c	2011-06-15 19:26:19.000000000 +0400
@@ -22,6 +22,7 @@
 #include <asm/meminit.h>
 #include <asm/numa.h>
 #include <asm/sections.h>
+#include <linux/module.h>
 
 /*
  * Track per-node information needed to setup the boot memory allocator, the
diff -Nurap linux-2.6.9-100.orig/arch/ia64/mm/fault.c linux-2.6.9-ve023stab054/arch/ia64/mm/fault.c
--- linux-2.6.9-100.orig/arch/ia64/mm/fault.c	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/mm/fault.c	2011-06-15 19:26:19.000000000 +0400
@@ -16,6 +16,9 @@
 #include <asm/uaccess.h>
 #include <asm/kdebug.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+
 extern void die (char *, struct pt_regs *, long);
 
 /*
@@ -36,6 +39,11 @@ expand_backing_store (struct vm_area_str
 	if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur
 	    || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur))
 		return -ENOMEM;
+
+	if (ub_memory_charge(mm_ub(vma->vm_mm), PAGE_SIZE,
+				vma->vm_flags, vma->vm_file, UB_HARD))
+		return -ENOMEM;
+
 	vma->vm_end += PAGE_SIZE;
 	vma->vm_mm->total_vm += grow;
 	if (vma->vm_flags & VM_LOCKED)
@@ -254,13 +262,13 @@ ia64_do_page_fault (unsigned long addres
 
   out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
-	if (user_mode(regs))
-		do_exit(SIGKILL);
+	if (user_mode(regs)) {
+		/* 
+		 * 0-order allocation always success if something really 
+		 * fatal not happen: beancounter overdraft or OOM. Den 
+		 */
+		force_sig(SIGKILL, current);
+		return;
+	}
 	goto no_context;
 }
diff -Nurap linux-2.6.9-100.orig/arch/ia64/mm/init.c linux-2.6.9-ve023stab054/arch/ia64/mm/init.c
--- linux-2.6.9-100.orig/arch/ia64/mm/init.c	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ia64/mm/init.c	2011-06-15 19:26:19.000000000 +0400
@@ -37,6 +37,8 @@
 #include <asm/unistd.h>
 #include <asm/mca.h>
 
+#include <ub/ub_vmpages.h>
+
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 extern void ia64_tlb_init (void);
@@ -117,6 +119,10 @@ ia64_init_addr_space (void)
 
 	ia64_set_rbs_bot();
 
+	if (ub_memory_charge(mm_ub(current->mm), PAGE_SIZE,
+				VM_DATA_DEFAULT_FLAGS, NULL, UB_SOFT))
+		return;
+
 	/*
 	 * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
 	 * the problem.  When the process attempts to write to the register backing store
@@ -134,10 +140,14 @@ ia64_init_addr_space (void)
 		if (insert_vm_struct(current->mm, vma)) {
 			up_write(&current->mm->mmap_sem);
 			kmem_cache_free(vm_area_cachep, vma);
+			ub_memory_uncharge(mm_ub(current->mm), PAGE_SIZE,
+					VM_DATA_DEFAULT_FLAGS, NULL);
 			return;
 		}
 		up_write(&current->mm->mmap_sem);
-	}
+	} else
+		ub_memory_uncharge(mm_ub(current->mm), PAGE_SIZE,
+				VM_DATA_DEFAULT_FLAGS, NULL);
 
 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
diff -Nurap linux-2.6.9-100.orig/arch/mips/kernel/irixelf.c linux-2.6.9-ve023stab054/arch/mips/kernel/irixelf.c
--- linux-2.6.9-100.orig/arch/mips/kernel/irixelf.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/mips/kernel/irixelf.c	2011-06-15 19:26:19.000000000 +0400
@@ -448,7 +448,12 @@ static inline int look_for_irix_interpre
 		if (retval < 0)
 			goto out;
 
-		file = open_exec(*name);
+		/*
+		 * I don't understand this loop.
+		 * Are we suppose to break the loop after successful open and
+		 * read, or close the file, or store it somewhere?  --SAW
+		 */
+		file = open_exec(*name, bprm);
 		if (IS_ERR(file)) {
 			retval = PTR_ERR(file);
 			goto out;
diff -Nurap linux-2.6.9-100.orig/arch/mips/kernel/irixsig.c linux-2.6.9-ve023stab054/arch/mips/kernel/irixsig.c
--- linux-2.6.9-100.orig/arch/mips/kernel/irixsig.c	2004-10-19 01:55:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/mips/kernel/irixsig.c	2011-06-15 19:26:18.000000000 +0400
@@ -182,9 +182,10 @@ asmlinkage int do_irix_signal(sigset_t *
 	if (!user_mode(regs))
 		return 1;
 
-	if (current->flags & PF_FREEZE) {
-		refrigerator(0);
-		goto no_signal;
+	if (unlikely(test_thread_flag(TIF_FREEZE))) {
+		refrigerator();
+		if (!signal_pending(current))
+			goto no_signal;
 	}
 
 	if (!oldset)
diff -Nurap linux-2.6.9-100.orig/arch/mips/kernel/signal.c linux-2.6.9-ve023stab054/arch/mips/kernel/signal.c
--- linux-2.6.9-100.orig/arch/mips/kernel/signal.c	2004-10-19 01:55:37.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/mips/kernel/signal.c	2011-06-15 19:26:18.000000000 +0400
@@ -551,9 +551,10 @@ asmlinkage int do_signal(sigset_t *oldse
 	if (!user_mode(regs))
 		return 1;
 
-	if (current->flags & PF_FREEZE) {
-		refrigerator(0);
-		goto no_signal;
+	if (unlikely(test_thread_flag(TIF_FREEZE))) {
+		refrigerator();
+		if (!signal_pending(current))
+			goto no_signal;
 	}
 
 	if (!oldset)
diff -Nurap linux-2.6.9-100.orig/arch/mips/kernel/signal32.c linux-2.6.9-ve023stab054/arch/mips/kernel/signal32.c
--- linux-2.6.9-100.orig/arch/mips/kernel/signal32.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/mips/kernel/signal32.c	2011-06-15 19:26:18.000000000 +0400
@@ -700,9 +700,10 @@ asmlinkage int do_signal32(sigset_t *old
 	if (!user_mode(regs))
 		return 1;
 
-	if (current->flags & PF_FREEZE) {
-		refrigerator(0);
-		goto no_signal;
+	if (unlikely(test_thread_flag(TIF_FREEZE))) {
+		refrigerator();
+		if (!signal_pending(current))
+			goto no_signal;
 	}
 
 	if (!oldset)
diff -Nurap linux-2.6.9-100.orig/arch/ppc/kernel/ptrace.c linux-2.6.9-ve023stab054/arch/ppc/kernel/ptrace.c
--- linux-2.6.9-100.orig/arch/ppc/kernel/ptrace.c	2011-06-09 19:22:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ppc/kernel/ptrace.c	2011-06-15 19:26:18.000000000 +0400
@@ -242,15 +242,18 @@ int sys_ptrace(long request, long pid, l
 
 	lock_kernel();
 	if (request == PTRACE_TRACEME) {
+		task_lock(current);
 		/* are we already being traced? */
 		if (current->ptrace & PT_PTRACED)
-			goto out;
+			goto out_task_unlock;
 		ret = security_ptrace(current->parent, current);
 		if (ret)
-			goto out;
+			goto out_task_unlock;
 		/* set the ptrace bit in the process flags. */
 		current->ptrace |= PT_PTRACED;
 		ret = 0;
+out_task_unlock:
+		task_unlock(current);
 		goto out;
 	}
 	ret = -ESRCH;
diff -Nurap linux-2.6.9-100.orig/arch/ppc64/boot/zlib.c linux-2.6.9-ve023stab054/arch/ppc64/boot/zlib.c
--- linux-2.6.9-100.orig/arch/ppc64/boot/zlib.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ppc64/boot/zlib.c	2011-06-15 19:26:18.000000000 +0400
@@ -1307,7 +1307,7 @@ local int huft_build(
   {
     *t = (inflate_huft *)Z_NULL;
     *m = 0;
-    return Z_OK;
+    return Z_DATA_ERROR;
   }
 
 
diff -Nurap linux-2.6.9-100.orig/arch/ppc64/kernel/ptrace.c linux-2.6.9-ve023stab054/arch/ppc64/kernel/ptrace.c
--- linux-2.6.9-100.orig/arch/ppc64/kernel/ptrace.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ppc64/kernel/ptrace.c	2011-06-15 19:26:18.000000000 +0400
@@ -57,15 +57,18 @@ int sys_ptrace(long request, long pid, l
 
 	lock_kernel();
 	if (request == PTRACE_TRACEME) {
+		task_lock(current);
 		/* are we already being traced? */
 		if (current->ptrace & PT_PTRACED)
-			goto out;
+			goto out_task_unlock;
 		ret = security_ptrace(current->parent, current);
 		if (ret)
-			goto out;
+			goto out_task_unlock;
 		/* set the ptrace bit in the process flags. */
 		current->ptrace |= PT_PTRACED;
 		ret = 0;
+out_task_unlock:
+		task_unlock(current);
 		goto out;
 	}
 	ret = -ESRCH;
diff -Nurap linux-2.6.9-100.orig/arch/ppc64/kernel/ptrace32.c linux-2.6.9-ve023stab054/arch/ppc64/kernel/ptrace32.c
--- linux-2.6.9-100.orig/arch/ppc64/kernel/ptrace32.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/ppc64/kernel/ptrace32.c	2011-06-15 19:26:18.000000000 +0400
@@ -45,15 +45,18 @@ int sys32_ptrace(long request, long pid,
 
 	lock_kernel();
 	if (request == PTRACE_TRACEME) {
+		task_lock(current);
 		/* are we already being traced? */
 		if (current->ptrace & PT_PTRACED)
-			goto out;
+			goto out_task_unlock;
 		ret = security_ptrace(current->parent, current);
 		if (ret)
-			goto out;
+			goto out_task_unlock;
 		/* set the ptrace bit in the process flags. */
 		current->ptrace |= PT_PTRACED;
 		ret = 0;
+out_task_unlock:
+		task_unlock(current);
 		goto out;
 	}
 	ret = -ESRCH;
diff -Nurap linux-2.6.9-100.orig/arch/s390/mm/fault.c linux-2.6.9-ve023stab054/arch/s390/mm/fault.c
--- linux-2.6.9-100.orig/arch/s390/mm/fault.c	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/s390/mm/fault.c	2011-06-15 19:26:18.000000000 +0400
@@ -61,17 +61,9 @@ void bust_spinlocks(int yes)
 	if (yes) {
 		oops_in_progress = 1;
 	} else {
-		int loglevel_save = console_loglevel;
 		oops_in_progress = 0;
 		console_unblank();
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk will give klogd
-		 * a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;
-		printk(" ");
-		console_loglevel = loglevel_save;
+		wake_up_klogd();
 	}
 }
 
diff -Nurap linux-2.6.9-100.orig/arch/sh/kernel/signal.c linux-2.6.9-ve023stab054/arch/sh/kernel/signal.c
--- linux-2.6.9-100.orig/arch/sh/kernel/signal.c	2004-10-19 01:55:36.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/sh/kernel/signal.c	2011-06-15 19:26:18.000000000 +0400
@@ -580,9 +580,10 @@ int do_signal(struct pt_regs *regs, sigs
 	if (!user_mode(regs))
 		return 1;
 
-	if (current->flags & PF_FREEZE) {
-		refrigerator(0);
-		goto no_signal;
+	if (unlikely(test_thread_flag(TIF_FREEZE))) {
+		refrigerator();
+		if (!signal_pending(current))
+			goto no_signal;
 	}
 
 	if (!oldset)
diff -Nurap linux-2.6.9-100.orig/arch/sh64/kernel/signal.c linux-2.6.9-ve023stab054/arch/sh64/kernel/signal.c
--- linux-2.6.9-100.orig/arch/sh64/kernel/signal.c	2004-10-19 01:54:08.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/sh64/kernel/signal.c	2011-06-15 19:26:18.000000000 +0400
@@ -701,10 +701,11 @@ int do_signal(struct pt_regs *regs, sigs
 	if (!user_mode(regs))
 		return 1;
 
-	if (current->flags & PF_FREEZE) {
-		refrigerator(0);
-		goto no_signal;
-		}
+	if (unlikely(test_thread_flag(TIF_FREEZE))) {
+		refrigerator();
+		if (!signal_pending(current))
+			goto no_signal;
+	}
 
 	if (!oldset)
 		oldset = &current->blocked;
diff -Nurap linux-2.6.9-100.orig/arch/sparc/kernel/ptrace.c linux-2.6.9-ve023stab054/arch/sparc/kernel/ptrace.c
--- linux-2.6.9-100.orig/arch/sparc/kernel/ptrace.c	2011-06-09 19:22:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/sparc/kernel/ptrace.c	2011-06-15 19:26:18.000000000 +0400
@@ -288,20 +288,23 @@ asmlinkage void do_ptrace(struct pt_regs
 	if(request == PTRACE_TRACEME) {
 		int ret;
 
+		task_lock(current);
 		/* are we already being traced? */
 		if (current->ptrace & PT_PTRACED) {
 			pt_error_return(regs, EPERM);
-			goto out;
+			goto out_task_unlock;
 		}
 		ret = security_ptrace(current->parent, current);
 		if (ret) {
 			pt_error_return(regs, -ret);
-			goto out;
+			goto out_task_unlock;
 		}
 
 		/* set the ptrace bit in the process flags. */
 		current->ptrace |= PT_PTRACED;
 		pt_succ_return(regs, 0);
+out_task_unlock:
+		task_unlock(current);
 		goto out;
 	}
 #ifndef ALLOW_INIT_TRACING
diff -Nurap linux-2.6.9-100.orig/arch/sparc64/kernel/ptrace.c linux-2.6.9-ve023stab054/arch/sparc64/kernel/ptrace.c
--- linux-2.6.9-100.orig/arch/sparc64/kernel/ptrace.c	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/sparc64/kernel/ptrace.c	2011-06-15 19:26:18.000000000 +0400
@@ -139,20 +139,23 @@ asmlinkage void do_ptrace(struct pt_regs
 	if (request == PTRACE_TRACEME) {
 		int ret;
 
+		task_lock(current);
 		/* are we already being traced? */
 		if (current->ptrace & PT_PTRACED) {
 			pt_error_return(regs, EPERM);
-			goto out;
+			goto out_task_unlock;
 		}
 		ret = security_ptrace(current->parent, current);
 		if (ret) {
 			pt_error_return(regs, -ret);
-			goto out;
+			goto out_task_unlock;
 		}
 
 		/* set the ptrace bit in the process flags. */
 		current->ptrace |= PT_PTRACED;
 		pt_succ_return(regs, 0);
+out_task_unlock:
+		task_unlock(current);
 		goto out;
 	}
 #ifndef ALLOW_INIT_TRACING
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/boot/compressed/head.S linux-2.6.9-ve023stab054/arch/x86_64/boot/compressed/head.S
--- linux-2.6.9-100.orig/arch/x86_64/boot/compressed/head.S	2004-10-19 01:55:28.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/boot/compressed/head.S	2011-06-15 19:26:20.000000000 +0400
@@ -35,7 +35,7 @@
 startup_32:
 	cld
 	cli
-	movl $(__KERNEL_DS),%eax
+	movl $(__BOOT_DS),%eax
 	movl %eax,%ds
 	movl %eax,%es
 	movl %eax,%fs
@@ -77,7 +77,7 @@ startup_32:
 	jnz  3f
 	addl $8,%esp
 	xorl %ebx,%ebx
-	ljmp $(__KERNEL_CS), $0x100000
+	ljmp $(__BOOT_CS), $0x100000
 
 /*
  * We come here, if we were loaded high.
@@ -105,7 +105,7 @@ startup_32:
 	popl %eax	# hcount
 	movl $0x100000,%edi
 	cli		# make sure we don't get interrupted
-	ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
+	ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
 
 /*
  * Routine (template) for moving the decompressed kernel in place,
@@ -128,7 +128,7 @@ move_routine_start:
 	movsl
 	movl %ebx,%esi	# Restore setup pointer
 	xorl %ebx,%ebx
-	ljmp $(__KERNEL_CS), $0x100000
+	ljmp $(__BOOT_CS), $0x100000
 move_routine_end:
 
 
@@ -138,5 +138,5 @@ user_stack:	 	
 	.fill 4096,4,0
 stack_start:	
 	.long user_stack+4096
-	.word __KERNEL_DS
+	.word __BOOT_DS
 
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/boot/setup.S linux-2.6.9-ve023stab054/arch/x86_64/boot/setup.S
--- linux-2.6.9-100.orig/arch/x86_64/boot/setup.S	2011-06-09 19:22:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/boot/setup.S	2011-06-15 19:26:20.000000000 +0400
@@ -729,7 +729,7 @@ flush_instr:
 	subw	$DELTA_INITSEG, %si
 	shll	$4, %esi			# Convert to 32-bit pointer
 # NOTE: For high loaded big kernels we need a
-#	jmpi    0x100000,__KERNEL_CS
+#	jmpi    0x100000,__BOOT_CS
 #
 #	but we yet haven't reloaded the CS register, so the default size 
 #	of the target offset still is 16 bit.
@@ -740,7 +740,7 @@ flush_instr:
 	.byte 0x66, 0xea			# prefix + jmpi-opcode
 code32:	.long	0x1000				# will be set to 0x100000
 						# for big kernels
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 # Here's a bunch of information about your current kernel..
 kernel_version:	.ascii	UTS_RELEASE
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/ia32/ia32_aout.c linux-2.6.9-ve023stab054/arch/x86_64/ia32/ia32_aout.c
--- linux-2.6.9-100.orig/arch/x86_64/ia32/ia32_aout.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/ia32/ia32_aout.c	2011-06-15 19:26:19.000000000 +0400
@@ -344,14 +344,14 @@ static int load_aout_binary(struct linux
 		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
 		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
 		{
-			printk(KERN_NOTICE "executable not page aligned\n");
+			ve_printk(VE_LOG, KERN_NOTICE "executable not page aligned\n");
 			error_time2 = jiffies;
 		}
 
 		if ((fd_offset & ~PAGE_MASK) != 0 &&
 		    (jiffies-error_time) > 5*HZ)
 		{
-			printk(KERN_WARNING 
+			ve_printk(VE_LOG, KERN_WARNING 
 			       "fd_offset is not page aligned. Please convert program: %s\n",
 			       bprm->file->f_dentry->d_name.name);
 			error_time = jiffies;
@@ -462,7 +462,7 @@ static int load_aout_library(struct file
 		static unsigned long error_time;
 		if ((jiffies-error_time) > 5*HZ)
 		{
-			printk(KERN_WARNING 
+			ve_printk(VE_LOG, KERN_WARNING 
 			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
 			       file->f_dentry->d_name.name);
 			error_time = jiffies;
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.9-ve023stab054/arch/x86_64/ia32/ia32_binfmt.c
--- linux-2.6.9-100.orig/arch/x86_64/ia32/ia32_binfmt.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/ia32/ia32_binfmt.c	2011-06-15 19:26:20.000000000 +0400
@@ -27,12 +27,14 @@
 #include <asm/ia32.h>
 #include <asm/vsyscall32.h>
 
+#include <ub/ub_vmpages.h>
+
 #define ELF_NAME "elf/i386"
 
 #define AT_SYSINFO 32
 #define AT_SYSINFO_EHDR		33
 
-int sysctl_vsyscall32 = 1;
+int sysctl_vsyscall32 = 0;
 
 #define ARCH_DLINFO do {  \
 	if (sysctl_vsyscall32) { \
@@ -46,7 +48,7 @@ struct elf_phdr; 
 
 #define IA32_EMULATOR 1
 
-#define ELF_ET_DYN_BASE		(TASK_UNMAPPED_32 + 0x1000000)
+#define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x1000000)
 
 #undef ELF_ARCH
 #define ELF_ARCH EM_386
@@ -261,8 +263,7 @@ MODULE_AUTHOR("Eric Youngdale, Andi Klee
 static void elf32_init(struct pt_regs *);
 
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
-#define arch_setup_additional_pages syscall32_setup_pages
-extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
+extern int arch_setup_additional_pages(struct linux_binprm *, int exstack, unsigned long map_address);
 
 #include "../../../fs/binfmt_elf.c" 
 
@@ -288,7 +289,7 @@ static void elf32_init(struct pt_regs *r
 
 int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
 {
-	unsigned long stack_base;
+	unsigned long stack_base, vm_end, vm_start;
 	struct vm_area_struct *mpnt;
 	struct mm_struct *mm = current->mm;
 	int i, ret;
@@ -301,9 +302,17 @@ int setup_arg_pages(struct linux_binprm 
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
+	vm_end = IA32_STACK_TOP;
+	vm_start = PAGE_MASK & (unsigned long)bprm->p;
+
+	ret = -ENOMEM;
+	if (ub_memory_charge(mm_ub(mm), vm_end - vm_start,
+				vm_stack_flags32, NULL, UB_HARD))
+		goto out;
+
 	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-	if (!mpnt) 
-		return -ENOMEM; 
+	if (!mpnt)
+		goto out_uncharge;
 
 	memset(mpnt, 0, sizeof(*mpnt));
 
@@ -320,11 +329,8 @@ int setup_arg_pages(struct linux_binprm 
 			mpnt->vm_flags = vm_stack_flags32;
  		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? 
  			PAGE_COPY_EXEC : PAGE_COPY;
-		if ((ret = insert_vm_struct(mm, mpnt))) {
-			up_write(&mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, mpnt);
-			return ret;
-		}
+		if ((ret = insert_vm_struct(mm, mpnt)))
+			goto out_up_free;
 		mm->stack_vm = mm->total_vm = vma_pages(mpnt);
 	} 
 
@@ -339,6 +345,15 @@ int setup_arg_pages(struct linux_binprm 
 	up_write(&mm->mmap_sem);
 	
 	return 0;
+
+out_up_free:
+	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, mpnt);
+out_uncharge:
+	ub_memory_uncharge(mm_ub(mm), vm_end - vm_start,
+			vm_stack_flags32, NULL);
+out:
+	return ret;
 }
 
 static unsigned long
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/ia32/ia32_signal.c linux-2.6.9-ve023stab054/arch/x86_64/ia32/ia32_signal.c
--- linux-2.6.9-100.orig/arch/x86_64/ia32/ia32_signal.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/ia32/ia32_signal.c	2011-06-15 19:26:20.000000000 +0400
@@ -123,6 +123,7 @@ sys32_sigsuspend(int history0, int histo
 	mask &= _BLOCKABLE;
 	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
+	set_sigsuspend_state(current, saveset);
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
@@ -131,8 +132,10 @@ sys32_sigsuspend(int history0, int histo
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
 		schedule();
-		if (do_signal(regs, &saveset))
+		if (do_signal(regs, &saveset)) {
+			clear_sigsuspend_state(current);
 			return -EINTR;
+		}
 	}
 }
 
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/ia32/ia32entry.S linux-2.6.9-ve023stab054/arch/x86_64/ia32/ia32entry.S
--- linux-2.6.9-100.orig/arch/x86_64/ia32/ia32entry.S	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/ia32/ia32entry.S	2011-06-15 19:26:19.000000000 +0400
@@ -76,7 +76,8 @@ ENTRY(ia32_sysenter_target)
 	pushq	$__USER32_DS
 	pushq	%rbp
 	pushfq
-	movl	$VSYSCALL32_SYSEXIT, %r10d
+	GET_THREAD_INFO(%r10)
+	movl	threadinfo_sysenter_return(%r10), %r10d
 	pushq	$__USER32_CS
 	movl	%eax, %eax
 	pushq	%r10
@@ -113,7 +114,7 @@ sysenter_do_call:	
 	xorq    %r11,%r11
 	popfq
 	popq	%rcx				/* User %esp */
-	movl	$VSYSCALL32_SYSEXIT,%edx	/* User %eip */
+	movl	threadinfo_sysenter_return(%r10),%edx /* User %eip */
 	swapgs
 	sti		/* sti only takes effect after the next instruction */
 	/* sysexit */
@@ -459,7 +460,7 @@ ia32_sys_call_table:
 	.quad sys_init_module
 	.quad sys_delete_module
 	.quad quiet_ni_syscall		/* 130  get_kernel_syms */
-	.quad sys32_quotactl		/* quotactl */ 
+	.quad sys32_quotactl
 	.quad sys_getpgid
 	.quad sys_fchdir
 	.quad quiet_ni_syscall	/* bdflush */
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/ia32/ptrace32.c linux-2.6.9-ve023stab054/arch/x86_64/ia32/ptrace32.c
--- linux-2.6.9-100.orig/arch/x86_64/ia32/ptrace32.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/ia32/ptrace32.c	2011-06-15 19:26:19.000000000 +0400
@@ -213,7 +213,7 @@ static struct task_struct *find_target(i
 
 	*err = -ESRCH;
 	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
+	child = find_task_by_pid_ve(pid);
 	if (child)
 		get_task_struct(child);
 	read_unlock(&tasklist_lock);
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/ia32/sys_ia32.c linux-2.6.9-ve023stab054/arch/x86_64/ia32/sys_ia32.c
--- linux-2.6.9-100.orig/arch/x86_64/ia32/sys_ia32.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/ia32/sys_ia32.c	2011-06-15 19:26:22.000000000 +0400
@@ -703,7 +703,7 @@ int sys32_ni_syscall(int call)
 	static char lastcomm[sizeof(me->comm)];
 
 	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
+		ve_printk(VE_LOG, KERN_INFO "IA32 syscall %d from %s not implemented\n",
 		       call, me->comm);
 		strncpy(lastcomm, me->comm, sizeof(lastcomm));
 	} 
@@ -951,7 +951,7 @@ sys32_sendfile(int out_fd, int in_fd, co
 	ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
 	set_fs(old_fs);
 	
-	if (!ret && offset && put_user(of, offset))
+	if (offset && put_user(of, offset))
 		return -EFAULT;
 		
 	return ret;
@@ -1079,13 +1079,13 @@ asmlinkage long sys32_olduname(struct ol
   
   	down_read(&uts_sem);
 	
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+	error = __copy_to_user(&name->sysname,&ve_utsname.sysname,__OLD_UTS_LEN);
 	 __put_user(0,name->sysname+__OLD_UTS_LEN);
-	 __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+	 __copy_to_user(&name->nodename,&ve_utsname.nodename,__OLD_UTS_LEN);
 	 __put_user(0,name->nodename+__OLD_UTS_LEN);
-	 __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+	 __copy_to_user(&name->release,&ve_utsname.release,__OLD_UTS_LEN);
 	 __put_user(0,name->release+__OLD_UTS_LEN);
-	 __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+	 __copy_to_user(&name->version,&ve_utsname.version,__OLD_UTS_LEN);
 	 __put_user(0,name->version+__OLD_UTS_LEN);
 	 { 
 		 char *arch = "x86_64";
@@ -1108,7 +1108,7 @@ long sys32_uname(struct old_utsname __us
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err=copy_to_user(name, &ve_utsname, sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32) 
 		err |= copy_to_user(&name->machine, "i686", 5);
@@ -1326,6 +1326,54 @@ asmlinkage long sys32_open(const char __
 	return fd;
 }
 
+asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
+						qid_t id, void __user *addr)
+{
+	long ret;
+	unsigned int cmds;
+	mm_segment_t old_fs;
+	struct if_dqblk dqblk;
+	struct if32_dqblk {
+		__u32 dqb_bhardlimit[2];
+		__u32 dqb_bsoftlimit[2];
+		__u32 dqb_curspace[2];
+		__u32 dqb_ihardlimit[2];
+		__u32 dqb_isoftlimit[2];
+		__u32 dqb_curinodes[2];
+		__u32 dqb_btime[2];
+		__u32 dqb_itime[2];
+		__u32 dqb_valid;
+	} dqblk32;
+
+	cmds = cmd >> SUBCMDSHIFT;
+
+	switch (cmds) {
+		case Q_GETQUOTA:
+			old_fs = get_fs();
+			set_fs(KERNEL_DS);
+			ret = sys_quotactl(cmd, special, id, &dqblk);
+			set_fs(old_fs);
+			memcpy(&dqblk32, &dqblk, sizeof(dqblk32));
+			dqblk32.dqb_valid = dqblk.dqb_valid;
+			if (copy_to_user(addr, &dqblk32, sizeof(dqblk32)))
+				return -EFAULT;
+			break;
+		case Q_SETQUOTA:
+			if (copy_from_user(&dqblk32, addr, sizeof(dqblk32)))
+				return -EFAULT;
+			memcpy(&dqblk, &dqblk32, sizeof(dqblk32));
+			dqblk.dqb_valid = dqblk32.dqb_valid;
+			old_fs = get_fs();
+			set_fs(KERNEL_DS);
+			ret = sys_quotactl(cmd, special, id, &dqblk);
+			set_fs(old_fs);
+			break;
+		default:
+			return sys32compat_quotactl(cmd, special, id, addr);
+	}
+	return ret;
+}
+
 struct sigevent32 { 
 	u32 sigev_value;
 	u32 sigev_signo; 
@@ -1371,25 +1419,13 @@ long sys32_vm86_warning(void)
 	struct task_struct *me = current;
 	static char lastcomm[sizeof(me->comm)];
 	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
+		ve_printk(VE_LOG, KERN_INFO "%s: vm87 mode not supported on 64 bit kernel\n",
 		       me->comm);
 		strncpy(lastcomm, me->comm, sizeof(lastcomm)); 
 	} 
 	return -ENOSYS;
 } 
 
-long sys32_quotactl(void)
-{ 
-	struct task_struct *me = current;
-	static char lastcomm[8];
-	if (strcmp(lastcomm, me->comm)) {
-		printk(KERN_INFO "%s: 32bit quotactl not supported on 64 bit kernel\n",
-		       me->comm);
-		strcpy(lastcomm, me->comm); 
-	} 
-	return -ENOSYS;
-} 
-
 long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
 			  char __user * buf, size_t len)
 {
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/ia32/syscall32.c linux-2.6.9-ve023stab054/arch/x86_64/ia32/syscall32.c
--- linux-2.6.9-100.orig/arch/x86_64/ia32/syscall32.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/ia32/syscall32.c	2011-06-15 19:26:21.000000000 +0400
@@ -4,12 +4,14 @@
    on demand because 32bit cannot reach the kernel's fixmaps */
 
 #include <linux/mm.h>
+#include <linux/mman.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/stringify.h>
 #include <linux/security.h>
+#include <linux/module.h>
 #include <asm/proto.h>
 #include <asm/tlbflush.h>
 #include <asm/ia32_unistd.h>
@@ -30,6 +32,7 @@ extern int sysctl_vsyscall32;
 
 char *syscall32_page; 
 static int use_sysenter __initdata = -1;
+EXPORT_SYMBOL(syscall32_page);
 
 static struct page *
 syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
@@ -44,29 +47,43 @@ static void syscall32_vma_close(struct v
 {
 }
 
-static struct vm_operations_struct syscall32_vm_ops = {
+struct vm_operations_struct syscall32_vm_ops = {
 	.close = syscall32_vma_close,
 	.nopage = syscall32_nopage,
 };
+EXPORT_SYMBOL(syscall32_vm_ops);
 
 struct linux_binprm;
 
 /* Setup a VMA at program startup for the vsyscall page */
-int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack,
+			  unsigned long map_address)
 {
-	int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
+	int npages = (__VSYSCALL32_END - __VSYSCALL32_BASE) >> PAGE_SHIFT;
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
+	unsigned long addr = map_address ? : __VSYSCALL32_BASE;
 	int ret;
 
+	if (sysctl_vsyscall32 == 0 && map_address == 0)
+		return 0;
+
 	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (!vma)
 		return -ENOMEM;
 
+	down_write(&mm->mmap_sem);
+	addr = get_unmapped_area_prot(NULL, addr, PAGE_SIZE * npages, 0,
+			MAP_PRIVATE | MAP_FIXED, PROT_READ | PROT_EXEC);
+	if (unlikely(addr & ~PAGE_MASK)) {
+		ret = addr;
+		goto out;
+	}
+
 	memset(vma, 0, sizeof(struct vm_area_struct));
 	/* Could randomize here */
-	vma->vm_start = VSYSCALL32_BASE;
-	vma->vm_end = VSYSCALL32_END;
+	vma->vm_start = addr;
+	vma->vm_end = addr + PAGE_SIZE * npages;
 	/* MAYWRITE to allow gdb to COW and set breakpoints */
 	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
 	/*
@@ -81,23 +98,28 @@ int syscall32_setup_pages(struct linux_b
 	vma->vm_ops = &syscall32_vm_ops;
 	vma->vm_mm = mm;
 
-	down_write(&mm->mmap_sem);
 	if ((ret = insert_vm_struct(mm, vma))) {
-		up_write(&mm->mmap_sem);
-		kmem_cache_free(vm_area_cachep, vma);
-		return ret;
+		goto out;
 	}
+	mm->context.vdso = (void *)addr;
+	current_thread_info()->sysenter_return = VSYSCALL32_SYSEXIT;
+
 	mm->total_vm += npages;
 	up_write(&mm->mmap_sem);
 	return 0;
+out:
+	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, vma);
+	vm_unacct_memory(npages);
+	return ret;
 }
+EXPORT_SYMBOL(arch_setup_additional_pages);
 
 static int __init init_syscall32(void)
 { 
 	syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); 
 	if (!syscall32_page) 
 		panic("Cannot allocate syscall32 page"); 
-	SetPageReserved(virt_to_page(syscall32_page));
  	if (use_sysenter > 0) {
  		memcpy(syscall32_page, syscall32_sysenter,
  		       syscall32_sysenter_end - syscall32_sysenter);
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/ia32/vsyscall-sysenter.S linux-2.6.9-ve023stab054/arch/x86_64/ia32/vsyscall-sysenter.S
--- linux-2.6.9-100.orig/arch/x86_64/ia32/vsyscall-sysenter.S	2004-10-19 01:55:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/ia32/vsyscall-sysenter.S	2011-06-15 19:26:19.000000000 +0400
@@ -19,9 +19,9 @@ __kernel_vsyscall:
 .Lenter_kernel:
 	movl	%esp,%ebp
 	sysenter
-	.space 7,0x90
+	.space 23,0x90
 	jmp	.Lenter_kernel
-	/* 16: System call normal return point is here! */
+	/* 32: System call normal return point is here! */
 	pop	%ebp
 .Lpop_ebp:
 	pop	%edx
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/ia32/vsyscall.lds linux-2.6.9-ve023stab054/arch/x86_64/ia32/vsyscall.lds
--- linux-2.6.9-100.orig/arch/x86_64/ia32/vsyscall.lds	2004-10-19 01:53:25.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/ia32/vsyscall.lds	2011-06-15 19:26:19.000000000 +0400
@@ -4,11 +4,11 @@
  */
 
 /* This must match <asm/fixmap.h>.  */
-VSYSCALL_BASE = 0xffffe000;
+__VSYSCALL_BASE = 0xbffff000;
 
 SECTIONS
 {
-  . = VSYSCALL_BASE + SIZEOF_HEADERS;
+  . = __VSYSCALL_BASE + SIZEOF_HEADERS;
 
   .hash           : { *(.hash) }		:text
   .dynsym         : { *(.dynsym) }
@@ -21,18 +21,18 @@ SECTIONS
      For the layouts to match, we need to skip more than enough
      space for the dynamic symbol table et al.  If this amount
      is insufficient, ld -shared will barf.  Just increase it here.  */
-  . = VSYSCALL_BASE + 0x400;
+  . = __VSYSCALL_BASE + 0x400;
   
   .text.vsyscall   : { *(.text.vsyscall) } 	:text =0x90909090
 
   /* This is an 32bit object and we cannot easily get the offsets
      into the 64bit kernel. Just hardcode them here. This assumes
      that all the stubs don't need more than 0x100 bytes. */
-  . = VSYSCALL_BASE + 0x500;
+  . = __VSYSCALL_BASE + 0x500;
 
   .text.sigreturn  : { *(.text.sigreturn) }	:text =0x90909090
 
-  . = VSYSCALL_BASE + 0x600;
+  . = __VSYSCALL_BASE + 0x600;
 
   .text.rtsigreturn : { *(.text.rtsigreturn) }   :text =0x90909090
 	
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/acpi/wakeup.S linux-2.6.9-ve023stab054/arch/x86_64/kernel/acpi/wakeup.S
--- linux-2.6.9-100.orig/arch/x86_64/kernel/acpi/wakeup.S	2004-10-19 01:54:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/acpi/wakeup.S	2011-06-15 19:26:20.000000000 +0400
@@ -77,7 +77,7 @@ wakeup_code:
 
 	.byte 0x66, 0xea			# prefix + jmpi-opcode
 	.long	wakeup_32 - __START_KERNEL_map
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 	.code32
 wakeup_32:
@@ -96,13 +96,13 @@ wakeup_32:
 	jnc	bogus_cpu
 	movl	%edx,%edi
 	
-	movw	$__KERNEL_DS, %ax
+	movw	$__BOOT_DS, %ax
 	movw	%ax, %ds
 	movw	%ax, %es
 	movw	%ax, %fs
 	movw	%ax, %gs
 
-	movw	$__KERNEL_DS, %ax	
+	movw	$__BOOT_DS, %ax	
 	movw	%ax, %ss
 
 	mov	$(wakeup_stack - __START_KERNEL_map), %esp
@@ -187,7 +187,7 @@ reach_compatibility_mode:
 
 wakeup_jumpvector:
 	.long	wakeup_long64 - __START_KERNEL_map
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 .code64
 
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/apic.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/apic.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/apic.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/apic.c	2011-06-15 19:26:19.000000000 +0400
@@ -900,6 +900,7 @@ void smp_local_timer_interrupt(struct pt
  */
 void smp_apic_timer_interrupt(struct pt_regs *regs)
 {
+	struct ve_struct *ve;
 	/*
 	 * the NMI deadlock-detector uses this.
 	 */
@@ -915,9 +916,11 @@ void smp_apic_timer_interrupt(struct pt_
 	 * Besides, if we don't timer interrupts ignore the global
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
+	ve = set_exec_env(get_ve0());
 	irq_enter();
 	smp_local_timer_interrupt(regs);
 	irq_exit();
+	(void)set_exec_env(ve);
 }
 
 /*
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/asm-offsets.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/asm-offsets.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/asm-offsets.c	2011-06-09 19:23:05.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/asm-offsets.c	2011-06-15 19:26:19.000000000 +0400
@@ -32,6 +32,7 @@ int main(void)
 	ENTRY(flags);
 	ENTRY(addr_limit);
 	ENTRY(preempt_count);
+	ENTRY(sysenter_return);
 	ENTRY(status);
 	BLANK();
 #undef ENTRY
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/entry.S linux-2.6.9-ve023stab054/arch/x86_64/kernel/entry.S
--- linux-2.6.9-100.orig/arch/x86_64/kernel/entry.S	2011-06-09 19:23:05.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/entry.S	2011-06-15 19:26:19.000000000 +0400
@@ -173,6 +173,10 @@ rff_trace:
  *
  * XXX	if we had a free scratch register we could save the RSP into the stack frame
  *      and report it properly in ps. Unfortunately we haven't.
+ *
+ * When user can change the frames always force IRET. That is because
+ * it deals with uncanonical addresses better. SYSRET has trouble
+ * with them due to bugs in both AMD and Intel CPUs.
  */ 			 		
 
 ENTRY(system_call)
@@ -236,7 +240,10 @@ sysret_signal:
 	xorl %esi,%esi # oldset -> arg2
 	call ptregscall_common
 1:	movl $_TIF_NEED_RESCHED,%edi
-	jmp sysret_check
+	/* Use IRET because user could have changed frame. This
+	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
+	cli
+	jmp int_with_check
 	
 	/* Do syscall tracing */
 tracesys:			 
@@ -257,7 +264,8 @@ tracesys:			 
 	call syscall_trace_leave
 	RESTORE_TOP_OF_STACK %rbx
 	RESTORE_REST
-	jmp ret_from_sys_call
+	/* Use IRET because user could have changed frame */
+	jmp int_ret_from_sys_call
 		
 badsys:
 	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)	
@@ -361,20 +369,9 @@ ENTRY(stub_execve)
 	popq %r11
 	CFI_ADJUST_CFA_OFFSET	-8
 	SAVE_REST
-	movq %r11, %r15
 	FIXUP_TOP_OF_STACK %r11
 	call sys_execve
-	GET_THREAD_INFO(%rcx)
-	bt $TIF_IA32,threadinfo_flags(%rcx)
-	jc exec_32bit
 	RESTORE_TOP_OF_STACK %r11
-	movq %r15, %r11
-	RESTORE_REST
-	push %r11
-	ret
-
-exec_32bit:
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
 	movq %rax,RAX(%rsp)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
@@ -477,7 +474,7 @@ iret_label:	
 	/* force a signal here? this matches i386 behaviour */
 	/* running with kernel gs */
 bad_iret:
-	movq $-9999,%rdi	/* better code? */
+	movq $11,%rdi		/* SIGSEGV */
 	sti
 	jmp do_exit			
 	.previous	
@@ -736,7 +733,7 @@ ENTRY(kernel_thread)
 	xorl %r9d,%r9d
 	
 	# clone now
-	call do_fork
+	call do_fork_kthread
 	movq %rax,RAX(%rsp)
 	xorl %edi,%edi
 
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/head.S linux-2.6.9-ve023stab054/arch/x86_64/kernel/head.S
--- linux-2.6.9-100.orig/arch/x86_64/kernel/head.S	2004-10-19 01:54:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/head.S	2011-06-15 19:26:20.000000000 +0400
@@ -39,7 +39,7 @@ startup_32:
 
 	movl %ebx,%ebp	/* Save trampoline flag */
 	
-	movl $__KERNEL_DS,%eax
+	movl $__BOOT_DS,%eax
 	movl %eax,%ds
 	
 	/* If the CPU doesn't support CPUID this will double fault.
@@ -159,7 +159,14 @@ reach_long64:
 	/* esi is pointer to real mode structure with interesting info.
 	   pass it to C */
 	movl	%esi, %edi
-	
+
+	/* Switch to __KERNEL_CS. The segment is the same, but selector
+	 * is different. */
+	pushq	$__KERNEL_CS
+	pushq	$switch_cs
+	lretq
+switch_cs:
+
 	/* Finally jump to run C code and to be on real kernel address
 	 * Since we are running on identity-mapped space we have to jump
 	 * to the full 64bit address , this is only possible as indirect
@@ -192,7 +199,7 @@ pGDT32:
 .org 0xf10	
 ljumpvector:
 	.long	reach_long64-__START_KERNEL_map
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 ENTRY(stext)
 ENTRY(_stext)
@@ -326,7 +333,7 @@ gdt:
 ENTRY(gdt_table32)
 	.quad	0x0000000000000000	/* This one is magic */
 	.quad	0x0000000000000000	/* unused */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
+	.quad	0x00af9a000000ffff	/* __BOOT_CS */
 gdt32_end:	
 	
 /* We need valid kernel segments for data and code in long mode too
@@ -337,23 +344,30 @@ gdt32_end:	
 .align L1_CACHE_BYTES
 
 /* The TLS descriptors are currently at a different place compared to i386.
-   Hopefully nobody expects them at a fixed place (Wine?) */
+   Hopefully nobody expects them at a fixed place (Wine?)
+   Descriptors rearranged to plase 32bit and TLS selectors in the same
+   places, because it is really necessary. sysret/exit mandates order
+   of kernel/user cs/ds, so we have to extend gdt.
+*/
 	
 ENTRY(cpu_gdt_table)
-	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x008f9a000000ffff	/* __KERNEL_COMPAT32_CS */	
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
-	.quad	0x00cffe000000ffff	/* __USER32_CS */
-	.quad	0x00cff2000000ffff	/* __USER_DS, __USER32_DS  */		
-	.quad	0x00affa000000ffff	/* __USER_CS */
-	.quad	0x00cf9a000000ffff	/* __KERNEL32_CS */
-	.quad	0,0			/* TSS */
-	.quad	0			/* LDT */
-	.quad   0,0,0			/* three TLS descriptors */ 
-	.quad	0			/* unused now */
-	.quad	0x00009a000000ffff	/* __KERNEL16_CS - 16bit PM for S3 wakeup. */
+	.quad	0x0000000000000000	/* 0 NULL descriptor */
+	.quad	0x008f9a000000ffff	/* 1 __KERNEL_COMPAT32_CS */	
+	.quad	0x00af9a000000ffff	/* 2 __BOOT_CS */
+	.quad	0x00cf92000000ffff	/* 3 __BOOT_DS */
+	.quad	0,0			/* 4,5 TSS */
+	.quad   0,0,0			/* 6-8 three TLS descriptors */ 
+	.quad	0x00009a000000ffff	/* 9 __KERNEL16_CS - 16bit PM for S3 wakeup. */
 					/* base must be patched for real base address. */
+	.quad	0			/* 10 LDT */
+	.quad	0x00cf9a000000ffff	/* 11 __KERNEL32_CS */
+	.quad	0x00af9a000000ffff	/* 12 __KERNEL_CS */
+	.quad	0x00cf92000000ffff	/* 13 __KERNEL_DS */
+	.quad	0x00cffe000000ffff	/* 14 __USER32_CS */
+	.quad	0x00cff2000000ffff	/* 15 __USER_DS, __USER32_DS  */		
+	.quad	0x00affa000000ffff	/* 16 __USER_CS */
+	.quad	0,0,0,0,0,0,0
+	.quad	0,0,0,0,0,0,0,0
 gdt_end:	
 	/* asm/segment.h:GDT_ENTRIES must match this */	
 	/* This should be a multiple of the cache line size */
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/irq.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/irq.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/irq.c	2011-06-09 19:22:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/irq.c	2011-06-15 19:26:19.000000000 +0400
@@ -45,7 +45,8 @@
 #include <asm/desc.h>
 #include <asm/irq.h>
 
-
+#include <ub/beancounter.h>
+#include <ub/ub_task.h>
 
 /*
  * Linux has a controller-independent x86 interrupt architecture.
@@ -214,16 +215,19 @@ int handle_IRQ_event(unsigned int irq, s
 {
 	int status = 1; /* Force the "do bottom halves" bit */
 	int ret;
+	struct user_beancounter *ub;
 
 	if (!(action->flags & SA_INTERRUPT))
 		local_irq_enable();
 
+	ub = set_exec_ub(get_ub0());
 	do {
 		ret = action->handler(irq, action->dev_id, regs);
 		if (ret == IRQ_HANDLED)
 			status |= action->flags;
 		action = action->next;
 	} while (action);
+	(void)set_exec_ub(ub);
 	if (status & SA_SAMPLE_RANDOM)
 		add_interrupt_randomness(irq);
 	local_irq_disable();
@@ -342,9 +346,11 @@ asmlinkage unsigned int do_IRQ(struct pt
 	irq_desc_t *desc = irq_desc + irq;
 	struct irqaction * action;
 	unsigned int status;
+	struct ve_struct *ve;
 
 	if (irq > 256) BUG();
 
+	ve = set_exec_env(get_ve0());
 	irq_enter(); 
 	kstat_cpu(cpu).irqs[irq]++;
 	spin_lock(&desc->lock);
@@ -407,6 +413,7 @@ out:
 	spin_unlock(&desc->lock);
 
 	irq_exit();
+	(void)set_exec_env(ve);
 	return 1;
 }
 
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/ldt.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/ldt.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/ldt.c	2004-10-19 01:55:18.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/ldt.c	2011-06-15 19:26:21.000000000 +0400
@@ -22,6 +22,7 @@
 #include <asm/ldt.h>
 #include <asm/desc.h>
 #include <asm/proto.h>
+#include <linux/module.h>
 
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
@@ -109,6 +110,7 @@ int init_new_context(struct task_struct 
 	}
 	return retval;
 }
+EXPORT_SYMBOL(init_new_context);
 
 /*
  * 
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/nmi.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/nmi.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/nmi.c	2011-06-09 19:23:04.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/nmi.c	2011-06-15 19:26:22.000000000 +0400
@@ -380,6 +380,29 @@ void touch_nmi_watchdog (void)
 	}
 }
 
+static spinlock_t show_regs_lock = SPIN_LOCK_UNLOCKED;
+
+void smp_show_regs(struct pt_regs *regs, void *data)
+{
+	if (regs == NULL)
+		return;
+
+	spin_lock(&show_regs_lock);
+	bust_spinlocks(1);
+	printk("----------- IPI show regs -----------\n");
+	show_regs(regs);
+	bust_spinlocks(0);
+	spin_unlock(&show_regs_lock);
+}
+
+static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+{
+	return 0;
+}
+
+static nmi_callback_t nmi_callback = dummy_nmi_callback;
+static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
+
 int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
 {
 	int sum, cpu, touched = 0, rc = 0;
@@ -428,16 +451,11 @@ int nmi_watchdog_tick (struct pt_regs * 
 		rc = 1;
 		break;
 	}
+	if (!rc && nmi_ipi_callback != dummy_nmi_callback)
+		rc = 1;
 	return rc;
 }
 
-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
-{
-	return 0;
-}
- 
-static nmi_callback_t nmi_callback = dummy_nmi_callback;
- 
 asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
 {
 	int cpu = safe_smp_processor_id();
@@ -446,9 +464,21 @@ asmlinkage void do_nmi(struct pt_regs * 
 	add_pda(__nmi_count,1);
 	if (!nmi_callback(regs, cpu))
 		default_do_nmi(regs);
+
+	nmi_ipi_callback(regs, cpu);
 	nmi_exit();
 }
 
+void set_nmi_ipi_callback(nmi_callback_t callback)
+{
+	nmi_ipi_callback = callback;
+}
+
+void unset_nmi_ipi_callback(void)
+{
+	nmi_ipi_callback = dummy_nmi_callback;
+}
+
 void set_nmi_callback(nmi_callback_t callback)
 {
 	nmi_callback = callback;
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/process.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/process.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/process.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/process.c	2011-06-15 19:26:22.000000000 +0400
@@ -33,6 +33,7 @@
 #include <linux/irq.h>
 #include <linux/ptrace.h>
 #include <linux/version.h>
+#include <linux/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -49,6 +50,13 @@
 #include <asm/ia32.h>
 
 asmlinkage extern void ret_from_fork(void);
+EXPORT_SYMBOL(ret_from_fork);
+
+asmlinkage extern void int_ret_from_sys_call(void);
+EXPORT_SYMBOL(int_ret_from_sys_call);
+
+asmlinkage extern void execve(void);
+EXPORT_SYMBOL(execve);
 
 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
 
@@ -214,8 +222,9 @@ void __show_regs(struct pt_regs * regs)
 	printk("Pid: %d, comm: %.20s %s %s\n", 
 	       current->pid, current->comm, print_tainted(), UTS_RELEASE);
 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
-	printk_address(regs->rip); 
-	printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
+	if (decode_call_traces)
+		printk_address(regs->rip); 
+	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       regs->rax, regs->rbx, regs->rcx);
 	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
@@ -250,6 +259,8 @@ void __show_regs(struct pt_regs * regs)
 
 void show_regs(struct pt_regs *regs)
 {
+	printk("CPU: %d, VCPU: %d:%d ", safe_smp_processor_id(),
+			task_vsched_id(current), task_cpu(current));
 	__show_regs(regs);
 	show_trace(&regs->rsp);
 }
@@ -623,7 +634,7 @@ long do_arch_prctl(struct task_struct *t
 
 	switch (code) { 
 	case ARCH_SET_GS:
-		if (addr >= TASK_SIZE) 
+		if (addr >= TASK_SIZE_OF(task)) 
 			return -EPERM; 
 		cpu = get_cpu();
 		/* handle small bases via the GDT because that's faster to 
@@ -649,7 +660,7 @@ long do_arch_prctl(struct task_struct *t
 	case ARCH_SET_FS:
 		/* Not strictly needed for fs, but do it for symmetry
 		   with gs */
-		if (addr >= TASK_SIZE)
+		if (addr >= TASK_SIZE_OF(task))
 			return -EPERM; 
 		cpu = get_cpu();
 		/* handle small bases via the GDT because that's faster to 
@@ -728,3 +739,20 @@ int dump_task_regs(struct task_struct *t
  
 	return 1;
 }
+
+long do_fork_kthread(unsigned long clone_flags,
+	      unsigned long stack_start,
+	      struct pt_regs *regs,
+	      unsigned long stack_size,
+	      int __user *parent_tidptr,
+	      int __user *child_tidptr)
+{
+	if (ve_allow_kthreads || ve_is_super(get_exec_env()))
+		return do_fork(clone_flags, stack_start, regs, stack_size,
+				parent_tidptr, child_tidptr);
+
+	/* Don't allow kernel_thread() inside VE */
+	printk("kernel_thread call inside VE\n");
+	dump_stack();
+	return -EPERM;
+}
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/ptrace.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/ptrace.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/ptrace.c	2011-06-09 19:23:05.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/ptrace.c	2011-06-15 19:26:22.000000000 +0400
@@ -125,12 +125,12 @@ static int putreg(struct task_struct *ch
 			value &= 0xffff;
 			return 0;
 		case offsetof(struct user_regs_struct,fs_base):
-			if (value >= TASK_SIZE)
+			if (value >= TASK_SIZE_OF(child))
 				return -EIO; 
 			child->thread.fs = value;
 			return 0;
 		case offsetof(struct user_regs_struct,gs_base):
-			if (value >= TASK_SIZE)
+			if (value >= TASK_SIZE_OF(child))
 				return -EIO; 
 			child->thread.gs = value;
 			return 0;
@@ -164,7 +164,7 @@ static int putreg(struct task_struct *ch
 			 * user stack.  So just disallow directly setting
 			 * any value in danger of being noncanonical.
 			 */
-			if (value >= TASK_SIZE)
+			if (value >= TASK_SIZE_OF(child))
 				return -EIO;
 			break;
 	}
@@ -188,6 +188,15 @@ static unsigned long getreg(struct task_
 			return child->thread.fs;
 		case offsetof(struct user_regs_struct, gs_base):
 			return child->thread.gs;
+		case offsetof(struct user_regs_struct, cs):
+			if (test_tsk_thread_flag(child, TIF_SYSCALL_TRACE)) {
+				val = get_stack_long(child, regno - sizeof(struct pt_regs));
+				if (val == __USER_CS)
+					return 0x33;
+				if (val == __USER32_CS)
+					return 0x23;
+			}
+			/* fall through */
 		default:
 			regno = regno - sizeof(struct pt_regs);
 			val = get_stack_long(child, regno);
@@ -208,20 +217,23 @@ asmlinkage long sys_ptrace(long request,
 	lock_kernel();
 	ret = -EPERM;
 	if (request == PTRACE_TRACEME) {
+		task_lock(current);
 		/* are we already being traced? */
 		if (current->ptrace & PT_PTRACED)
-			goto out;
+			goto out_task_unlock;
 		ret = security_ptrace(current->parent, current);
 		if (ret)
-			goto out;
+			goto out_task_unlock;
 		/* set the ptrace bit in the process flags. */
 		current->ptrace |= PT_PTRACED;
 		ret = 0;
+out_task_unlock:
+		task_unlock(current);
 		goto out;
 	}
 	ret = -ESRCH;
 	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
+	child = find_task_by_pid_ve(pid);
 	if (child)
 		get_task_struct(child);
 	read_unlock(&tasklist_lock);
@@ -304,6 +316,10 @@ asmlinkage long sys_ptrace(long request,
 		break;
 
 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
+	{
+		int dsize;
+
+		dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7;
 		ret = -EIO;
 		if ((addr & 7) ||
 		    addr > sizeof(struct user) - 7)
@@ -315,22 +331,22 @@ asmlinkage long sys_ptrace(long request,
 			break;
 		/* Disallows to set a breakpoint into the vsyscall */
 		case offsetof(struct user, u_debugreg[0]):
-			if (data >= TASK_SIZE-7) break;
+			if (data >= TASK_SIZE_OF(child) - dsize) break;
 			child->thread.debugreg0 = data;
 			ret = 0;
 			break;
 		case offsetof(struct user, u_debugreg[1]):
-			if (data >= TASK_SIZE-7) break;
+			if (data >= TASK_SIZE_OF(child) - dsize) break;
 			child->thread.debugreg1 = data;
 			ret = 0;
 			break;
 		case offsetof(struct user, u_debugreg[2]):
-			if (data >= TASK_SIZE-7) break;
+			if (data >= TASK_SIZE_OF(child) - dsize) break;
 			child->thread.debugreg2 = data;
 			ret = 0;
 			break;
 		case offsetof(struct user, u_debugreg[3]):
-			if (data >= TASK_SIZE-7) break;
+			if (data >= TASK_SIZE_OF(child) - dsize) break;
 			child->thread.debugreg3 = data;
 			ret = 0;
 			break;
@@ -352,6 +368,7 @@ asmlinkage long sys_ptrace(long request,
 		  break;
 		}
 		break;
+	}
 	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
 	case PTRACE_CONT: { /* restart after signal. */
 		long tmp;
@@ -522,8 +539,10 @@ static void syscall_trace(struct pt_regs
 	       current_thread_info()->flags, current->ptrace); 
 #endif
 
+	set_pn_state(current, (regs->rax != -ENOSYS) ? PN_STOP_LEAVE : PN_STOP_ENTRY);
 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
 				? 0x80 : 0));
+	clear_pn_state(current);
 	/*
 	 * this isn't the same as continuing with a signal, but it will do
 	 * for normal use.  strace only continues with a signal if the
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/setup.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/setup.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/setup.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/setup.c	2011-06-15 19:26:22.000000000 +0400
@@ -42,6 +42,7 @@
 #include <linux/edd.h>
 #include <linux/cpufreq.h>
 #include <linux/dmi.h>
+#include <linux/vsched.h>
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -1268,12 +1269,18 @@ static int show_cpuinfo(struct seq_file 
  		NULL,
  		/* nothing */	/* constant_tsc - moved to flags */
 	};
+	unsigned long vcpu_khz;
 
 
 #ifdef CONFIG_SMP
-	if (!cpu_online(c-cpu_data))
+	if (!vcpu_online(c-cpu_data))
 		return 0;
 #endif
+#ifdef CONFIG_FAIRSCHED
+	vcpu_khz = ve_scale_khz(cpu_khz);
+#else
+	vcpu_khz = cpu_khz;
+#endif
 
 	seq_printf(m,"processor\t: %u\n"
 		     "vendor_id\t: %s\n"
@@ -1294,7 +1301,7 @@ static int show_cpuinfo(struct seq_file 
 	if (cpu_has(c,X86_FEATURE_TSC)) {
 		unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
 		if (!freq)
-			freq = cpu_khz;
+			freq = vcpu_khz;
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
 			     freq / 1000, (freq % 1000));
 	}
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/signal.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/signal.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/signal.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/signal.c	2011-06-15 19:26:21.000000000 +0400
@@ -31,6 +31,7 @@
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
 #include <asm/vsyscall.h>
+#include <linux/module.h>
 
 /* #define DEBUG_SIG 1 */
 
@@ -56,6 +57,7 @@ sys_rt_sigsuspend(sigset_t __user *unews
 
 	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
+	set_sigsuspend_state(current, saveset);
 	current->blocked = newset;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
@@ -67,8 +69,10 @@ sys_rt_sigsuspend(sigset_t __user *unews
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
 		schedule();
-		if (do_signal(regs, &saveset))
+		if (do_signal(regs, &saveset)) {
+			clear_sigsuspend_state(current);
 			return -EINTR;
+		}
 	}
 }
 
@@ -461,9 +465,10 @@ int do_signal(struct pt_regs *regs, sigs
 		return 1;
 	} 	
 
-	if (current->flags & PF_FREEZE) {
-		refrigerator(0);
-		goto no_signal;
+	if (test_thread_flag(TIF_FREEZE)) {
+		refrigerator();
+		if (!signal_pending(current))
+			goto no_signal;
 	}
 
 	if (!oldset)
@@ -504,6 +509,7 @@ int do_signal(struct pt_regs *regs, sigs
 	}
 	return 0;
 }
+EXPORT_SYMBOL(do_signal);
 
 void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags)
 {
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/smp.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/smp.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/smp.c	2011-06-09 19:22:59.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/smp.c	2011-06-15 19:26:22.000000000 +0400
@@ -20,11 +20,13 @@
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 
 #include <asm/mtrr.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/mach_apic.h>
+#include <asm/nmi.h>
 
 /*
  *	Smarter SMP flushing macros. 
@@ -212,6 +214,7 @@ void flush_tlb_mm (struct mm_struct * mm
 
 	preempt_enable();
 }
+EXPORT_SYMBOL(flush_tlb_mm);
 
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
@@ -280,7 +283,8 @@ struct call_data_struct {
 	int wait;
 };
 
-static struct call_data_struct * call_data;
+static struct call_data_struct call_fn_data;
+static struct call_data_struct * call_data = &call_fn_data;
 static struct call_data_struct * saved_call_data;
 
 /*
@@ -348,30 +352,28 @@ void dump_smp_call_function (void (*func
 static void __smp_call_function (void (*func) (void *info), void *info,
 				int nonatomic, int wait)
 {
-	struct call_data_struct data;
 	int cpus = num_online_cpus()-1;
 
 	if (!cpus)
 		return;
 
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
+	call_fn_data.func = func;
+	call_fn_data.info = info;
+	call_fn_data.wait = wait;
+	atomic_set(&call_fn_data.started, 0);
 	if (wait)
-		atomic_set(&data.finished, 0);
+		atomic_set(&call_fn_data.finished, 0);
 
-	call_data = &data;
 	wmb();
 	/* Send a message to all other CPUs and wait for them to respond */
 	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 
 	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
+	while (atomic_read(&call_fn_data.started) != cpus)
 		cpu_relax();
 
 	if (wait)
-		while (atomic_read(&data.finished) != cpus)
+		while (atomic_read(&call_fn_data.finished) != cpus)
 			cpu_relax();
 
 }
@@ -399,6 +401,83 @@ int smp_call_function (void (*func) (voi
 	return 0;
 }
 
+static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
+static struct nmi_call_data_struct {
+	smp_nmi_function func;
+	void *info;
+	atomic_t started;
+	atomic_t finished;
+	cpumask_t cpus_called;
+	int wait;
+} *nmi_call_data;
+
+static int smp_nmi_callback(struct pt_regs * regs, int cpu)
+{
+	smp_nmi_function func;
+	void *info;
+	int wait;
+
+	func = nmi_call_data->func;
+	info = nmi_call_data->info;
+	wait = nmi_call_data->wait;
+	/* prevent from calling func() multiple times */
+	if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
+		return 0;
+	/*
+	 * notify initiating CPU that I've grabbed the data and am
+	 * about to execute the function
+	 */
+	mb();
+	atomic_inc(&nmi_call_data->started);
+	/* at this point the nmi_call_data structure is out of scope */
+	irq_enter();
+	func(regs, info);
+	irq_exit();
+	if (wait)
+		atomic_inc(&nmi_call_data->finished);
+
+	return 0;
+}
+
+int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
+{
+	struct nmi_call_data_struct data;
+	int cpus;
+
+	cpus = num_online_cpus() - 1;
+	if (!cpus)
+		return 0;
+
+	data.func = func;
+	data.info = info;
+	data.wait = wait;
+	atomic_set(&data.started, 0);
+	atomic_set(&data.finished, 0);
+	cpus_clear(data.cpus_called);
+	/* prevent this cpu from calling func if NMI happens */
+	cpu_set(smp_processor_id(), data.cpus_called);
+
+	if (!spin_trylock(&nmi_call_lock))
+		return -1;
+
+	nmi_call_data = &data;
+	set_nmi_ipi_callback(smp_nmi_callback);
+	mb();
+
+	/* Send a message to all other CPUs and wait for them to respond */
+	send_IPI_allbutself(APIC_DM_NMI);
+	while (atomic_read(&data.started) != cpus)
+		barrier();
+
+	unset_nmi_ipi_callback();
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			barrier();
+	spin_unlock(&nmi_call_lock);
+
+	return 0;
+}
+
 void smp_stop_cpu(void)
 {
 	/*
@@ -417,18 +496,18 @@ static void smp_really_stop_cpu(void *du
 		asm("hlt"); 
 } 
 
+static struct call_data_struct stop_cpus_data = {
+	.func = &smp_really_stop_cpu,
+	.info = NULL,
+	.wait = 0,
+};
+
 void smp_send_stop(void)
 {
-	int nolock = 0;
-	/* Don't deadlock on the call lock in panic */
-	if (!spin_trylock(&call_lock)) {
-		udelay(100);
-		/* ignore locking because we have paniced anyways */
-		nolock = 1;
-	}
-	__smp_call_function(smp_really_stop_cpu, NULL, 1, 0);
-	if (!nolock)
-		spin_unlock(&call_lock);
+	call_data = &stop_cpus_data;
+	mb();
+	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
 	local_irq_disable();
 	disable_local_APIC();
 	local_irq_enable(); 
@@ -446,11 +525,17 @@ asmlinkage void smp_reschedule_interrupt
 
 asmlinkage void smp_call_function_interrupt(void)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
+	struct call_data_struct *data;
+	void (*func) (void *info);
+	void *info;
+	int wait;
 
 	ack_APIC_irq();
+
+	data = call_data;
+	func = data->func;
+	info = data->info;
+	wait = data->wait;
 	/*
 	 * Notify initiating CPU that I've grabbed the data and am
 	 * about to execute the function
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/smpboot.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/smpboot.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/smpboot.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/smpboot.c	2011-06-15 19:26:22.000000000 +0400
@@ -735,6 +735,7 @@ static void __init do_boot_cpu (int apic
 	if (boot_error) {
 		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
 		clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+		fini_idle(cpu); /* undo fork_idle() */
 		cpucount--;
 		x86_cpu_to_apicid[cpu] = BAD_APICID;
 		x86_cpu_to_log_apicid[cpu] = BAD_APICID;
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/sys_x86_64.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/sys_x86_64.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/sys_x86_64.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/sys_x86_64.c	2011-06-15 19:26:19.000000000 +0400
@@ -54,13 +54,7 @@ out:
 void find_start_end(unsigned long flags, unsigned long *begin,
 			   unsigned long *end)
 {
-#ifdef CONFIG_IA32_EMULATION
-	if (test_thread_flag(TIF_IA32)) { 
-		*begin = TASK_UNMAPPED_32;
-		*end = IA32_PAGE_OFFSET; 
-	} else 
-#endif
-	if (flags & MAP_32BIT) { 
+	if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { 
 		/* This is usually used needed to map code in small
 		   model, so it needs to be in the first 31bit. Limit
 		   it to that.  This means we need to move the
@@ -68,12 +62,12 @@ void find_start_end(unsigned long flags,
 		   conflicts with the heap, but we assume that glibc
 		   malloc knows how to fall back to mmap. Give it 1GB
 		   of playground for now. -AK */ 
-		*begin = 0x40000000; 
-		*end = 0x80000000;		
-	} else { 
-		*begin = TASK_UNMAPPED_64; 
-		*end = TASK_SIZE; 
-		}
+		*begin = 0x40000000;
+		*end = 0x80000000;
+	} else {
+		*begin = TASK_UNMAPPED_BASE;
+		*end = TASK_SIZE;
+	}
 } 
 
 unsigned long
@@ -226,7 +220,7 @@ asmlinkage long sys_uname(struct new_uts
 {
 	int err;
 	down_read(&uts_sem);
-	err = copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, &ve_utsname, sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32) 
 		err |= copy_to_user(&name->machine, "i686", 5); 		
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/time.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/time.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/time.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/time.c	2011-06-15 19:26:19.000000000 +0400
@@ -72,6 +72,8 @@ unsigned long vxtime_hz = PIT_TICK_RATE;
 int report_lost_ticks;				/* command line option */
 unsigned long long monotonic_base;
 
+EXPORT_SYMBOL(cpu_khz);
+
 struct vxtime_data __vxtime __section_vxtime;	/* for vsyscalls */
 
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
@@ -1046,6 +1048,7 @@ void __init time_init_gtod(void)
 	if (unsynchronized_tsc())
 		notsc = 1;
 
+	sysctl_vsyscall = 1;
 	if (vxtime.hpet_address && notsc) {
 		timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
 		vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick_real;
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/trampoline.S linux-2.6.9-ve023stab054/arch/x86_64/kernel/trampoline.S
--- linux-2.6.9-100.orig/arch/x86_64/kernel/trampoline.S	2004-10-19 01:55:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/trampoline.S	2011-06-15 19:26:21.000000000 +0400
@@ -46,7 +46,7 @@ r_base = .
 	lidt	idt_48 - r_base	# load idt with 0, 0
 	lgdt	gdt_48 - r_base	# load gdt with whatever is appropriate
 
-	movw    $__KERNEL_DS,%ax
+	movw    $__BOOT_DS,%ax
 	movw    %ax,%ds
 	movw    %ax,%es
 	
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/traps.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/traps.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/traps.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/traps.c	2011-06-15 19:26:22.000000000 +0400
@@ -95,7 +95,7 @@ static int kstack_depth_to_print = 10;
 
 #ifdef CONFIG_KALLSYMS
 #include <linux/kallsyms.h> 
-int printk_address(unsigned long address)
+void printk_address(unsigned long address)
 { 
 	unsigned long offset = 0, symsize;
 	const char *symname;
@@ -103,19 +103,24 @@ int printk_address(unsigned long address
 	char *delim = ":"; 
 	char namebuf[128];
 
+	if (!decode_call_traces)
+		return printk(" [<%016lx>]", address);
+
 	symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); 
-	if (!symname) 
-		return printk("[<%016lx>]", address);
+	if (!symname) {
+		printk(" [<%016lx>]\n", address);
+		return;
+	}
 	if (!modname) 
 		modname = delim = ""; 		
-        return printk("<%016lx>{%s%s%s%s%+ld}",
-		      address,delim,modname,delim,symname,offset); 
+	printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n",
+		address, delim, modname, delim, symname, offset, symsize);
 } 
 #else
-int printk_address(unsigned long address)
-{ 
-	return printk("[<%016lx>]", address);
-} 
+void printk_address(unsigned long address)
+{
+	printk(" [<%016lx>]\n", address);
+}
 #endif
 
 unsigned long *in_exception_stack(int cpu, unsigned long stack) 
@@ -144,26 +149,17 @@ void show_trace(unsigned long *stack)
 	unsigned long addr;
 	unsigned long *irqstack, *irqstack_end, *estack_end;
 	const int cpu = safe_smp_processor_id();
-	int i;
 
 	printk("\nCall Trace:");
-	i = 0; 
 	
 	estack_end = in_exception_stack(cpu, (unsigned long)stack); 
 	if (estack_end) { 
 		while (stack < estack_end) { 
 			addr = *stack++; 
 			if (__kernel_text_address(addr)) {
-				i += printk_address(addr);
-				i += printk(" "); 
-				if (i > 50) {
-					printk("\n"); 
-					i = 0;
-				}
+				printk_address(addr);
 			}
 		}
-		i += printk(" <EOE> "); 
-		i += 7;
 		stack = (unsigned long *) estack_end[-2]; 
 	}  
 
@@ -171,7 +167,7 @@ void show_trace(unsigned long *stack)
 	irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE + 64);
 
 	if (stack >= irqstack && stack < irqstack_end) {
-		printk("<IRQ> ");  
+		printk(" <IRQ> ");  
 		while (stack < irqstack_end) {
 			addr = *stack++;
 			/*
@@ -183,28 +179,17 @@ void show_trace(unsigned long *stack)
 			 * out the call path that was taken.
 			 */
 			 if (__kernel_text_address(addr)) {
-				 i += printk_address(addr);
-				 i += printk(" "); 
-				 if (i > 50) { 
-					printk("\n       ");
-					 i = 0;
-				 } 
+				 printk_address(addr);
 			}
 		} 
 		stack = (unsigned long *) (irqstack_end[-1]);
-		printk(" <EOI> ");
-		i += 7;
+		printk(" <EOI> ");  
 	} 
 
 	while (((long) stack & (THREAD_SIZE-1)) != 0) {
 		addr = *stack++;
 		if (__kernel_text_address(addr)) {
-			i += printk_address(addr);
-			i += printk(" "); 
-			if (i > 50) { 
-				printk("\n       ");
-					 i = 0;
-			} 
+			printk_address(addr);
 		}
 	}
 	printk("\n");
@@ -240,12 +225,14 @@ void show_stack(struct task_struct *tsk,
 			break;
 		}
 		if (i && ((i % 4) == 0))
-			printk("\n       ");
-		printk("%016lx ", *stack++);
+			printk("\n");
+		printk(" %016lx", *stack++);
 	}
 	show_trace((unsigned long *)rsp);
 }
 
+EXPORT_SYMBOL(show_stack);
+
 /*
  * The architecture-independent dump_stack generator
  */
@@ -267,10 +254,13 @@ void show_registers(struct pt_regs *regs
 
 		rsp = regs->rsp;
 
-	printk("CPU %d ", cpu);
+	printk("CPU: %d, VCPU: %d:%d ", cpu, task_vsched_id(current), 
+			task_cpu(current));
 	__show_regs(regs);
-	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
-		cur->comm, cur->pid, cur->thread_info, cur);
+	printk("Process %s (pid: %d, veid=%d, threadinfo %p, task %p)\n",
+		cur->comm, cur->pid,
+		VEID(VE_TASK_INFO(current)->owner_env),
+		cur->thread_info, cur);
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
@@ -403,8 +393,12 @@ void die_nmi(char *str, struct pt_regs *
 	show_registers(regs);
 	if (panic_on_timeout || panic_on_oops)
 		panic("nmi watchdog");
-	printk("console shuts up ...\n");
+	smp_nmi_call_function(smp_show_regs, NULL, 1);
+	bust_spinlocks(1);
+	printk("NMI occurs. Console shuts up.");
+	console_loglevel = 0;
 	oops_end();
+	nmi_exit();
 	do_exit(SIGSEGV);
 }
 
@@ -504,6 +498,16 @@ DO_ERROR(18, SIGSEGV, "reserved", reserv
 asmlinkage void *do_##name(struct pt_regs * regs, long error_code) \
 { \
 	struct pt_regs *pr = ((struct pt_regs *)(current->thread.rsp0))-1; \
+	if (trapnr == 8) {						\
+	int i;								\
+	printk("thread info page:");					\
+	for (i = 0; i < THREAD_SIZE/sizeof(unsigned long); i++) {	\
+		if ((i & 3) == 0)					\
+			printk("\n%016lx: ", (unsigned long *)current->thread_info + i); \
+		printk("%016lx ", *((unsigned long *)current->thread_info + i));\
+	}	\
+	show_trace((unsigned long *)current->thread_info + 1);		\
+	}								\
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
 							== NOTIFY_STOP) \
 		return regs; \
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/vmlinux.lds.S linux-2.6.9-ve023stab054/arch/x86_64/kernel/vmlinux.lds.S
--- linux-2.6.9-100.orig/arch/x86_64/kernel/vmlinux.lds.S	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/vmlinux.lds.S	2011-06-15 19:26:18.000000000 +0400
@@ -45,32 +45,31 @@ SECTIONS
 	}
   __bss_end = .;
 
-  . = ALIGN(64);
+  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
   .data.cacheline_aligned : { *(.data.cacheline_aligned) }
 
+#define AFTER(x)      BINALIGN(LOADADDR(x) + SIZEOF(x), 16)
+#define BINALIGN(x,y) (((x) + (y) - 1)  & ~((y) - 1))
+#define CACHE_ALIGN(x) BINALIGN(x, CONFIG_X86_L1_CACHE_BYTES)
+
   .vsyscall_0 -10*1024*1024: AT ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) { *(.vsyscall_0) }
   __vsyscall_0 = LOADADDR(.vsyscall_0);
-  . = ALIGN(64);
-  .xtime_lock : AT ((LOADADDR(.vsyscall_0) + SIZEOF(.vsyscall_0) + 63) & ~(63)) { *(.xtime_lock) }
+  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+  .xtime_lock : AT CACHE_ALIGN(AFTER(.vsyscall_0)) { *(.xtime_lock) }
   xtime_lock = LOADADDR(.xtime_lock);
-  . = ALIGN(16);
-  .vxtime : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.vxtime) }
+  .vxtime : AT AFTER(.xtime_lock) { *(.vxtime) }
   vxtime = LOADADDR(.vxtime);
-  . = ALIGN(16);
-  .wall_jiffies : AT ((LOADADDR(.vxtime) + SIZEOF(.vxtime) + 15) & ~(15)) { *(.wall_jiffies) }
+  .wall_jiffies : AT AFTER(.vxtime) { *(.wall_jiffies) }
   wall_jiffies = LOADADDR(.wall_jiffies);
-  . = ALIGN(16);
-  .sys_tz : AT ((LOADADDR(.wall_jiffies) + SIZEOF(.wall_jiffies) + 15) & ~(15)) { *(.sys_tz) }
+  .sys_tz : AT AFTER(.wall_jiffies) { *(.sys_tz) }
   sys_tz = LOADADDR(.sys_tz);
-  . = ALIGN(16);
-  .sysctl_vsyscall : AT ((LOADADDR(.sys_tz) + SIZEOF(.sys_tz) + 15) & ~(15)) { *(.sysctl_vsyscall) }
-  sysctl_vsyscall = LOADADDR(.sysctl_vsyscall);
-  . = ALIGN(16);
-  .jiffies : AT ((LOADADDR(.sysctl_vsyscall) + SIZEOF(.sysctl_vsyscall) + 15) & ~(15)) { *(.jiffies) }
-  jiffies = LOADADDR(.jiffies);
-  . = ALIGN(16);
-  .xtime : AT ((LOADADDR(.jiffies) + SIZEOF(.jiffies) + 15) & ~(15)) { *(.xtime) }
+  .sysctl_vsyscall : AT AFTER(.sys_tz) { *(.sysctl_vsyscall) }
+  sysctl_vsyscall = LOADADDR(.sysctl_vsyscall); 
+  .xtime : AT AFTER(.sysctl_vsyscall) { *(.xtime) }
   xtime = LOADADDR(.xtime);
+  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+  .jiffies : AT CACHE_ALIGN(AFTER(.xtime)) { *(.jiffies) }
+  jiffies = LOADADDR(.jiffies);
   .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT (LOADADDR(.vsyscall_0) + 1024) { *(.vsyscall_1) }
   . = LOADADDR(.vsyscall_0) + 4096;
 
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/vsyscall.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/vsyscall.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/vsyscall.c	2011-06-09 19:22:53.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/vsyscall.c	2011-06-15 19:26:18.000000000 +0400
@@ -165,13 +165,10 @@ static void __init map_vsyscall(void)
 
 static int __init vsyscall_init(void)
 {
-	if ((unsigned long) &vgettimeofday != VSYSCALL_ADDR(__NR_vgettimeofday))
-		panic("vgettimeofday link addr broken");
-	if ((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime))
-		panic("vtime link addr broken");
-	if (VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))
-		panic("fixmap first vsyscall %lx should be %lx", __fix_to_virt(VSYSCALL_FIRST_PAGE),
-		      VSYSCALL_ADDR(0));
+        BUG_ON(((unsigned long) &vgettimeofday != 
+		      VSYSCALL_ADDR(__NR_vgettimeofday)));
+	BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
+	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
 	map_vsyscall();
 
 	return 0;
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/kernel/x8664_ksyms.c linux-2.6.9-ve023stab054/arch/x86_64/kernel/x8664_ksyms.c
--- linux-2.6.9-100.orig/arch/x86_64/kernel/x8664_ksyms.c	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/kernel/x8664_ksyms.c	2011-06-15 19:26:21.000000000 +0400
@@ -36,6 +36,8 @@
 
 extern spinlock_t rtc_lock;
 
+EXPORT_SYMBOL(cpu_gdt_table);
+
 #ifdef CONFIG_SMP
 extern void __write_lock_failed(rwlock_t *rw);
 extern void __read_lock_failed(rwlock_t *rw);
diff -Nurap linux-2.6.9-100.orig/arch/x86_64/mm/fault.c linux-2.6.9-ve023stab054/arch/x86_64/mm/fault.c
--- linux-2.6.9-100.orig/arch/x86_64/mm/fault.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/arch/x86_64/mm/fault.c	2011-06-15 19:26:22.000000000 +0400
@@ -34,27 +34,6 @@
 #include <asm/kdebug.h>
 #include <asm-generic/sections.h>
 
-void bust_spinlocks(int yes)
-{
-	int loglevel_save = console_loglevel;
-	if (yes) {
-		oops_in_progress = 1;
-	} else {
-#ifdef CONFIG_VT
-		unblank_screen();
-#endif
-		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk will give klogd
-		 * a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
-		printk(" ");
-		console_loglevel = loglevel_save;
-	}
-}
-
 /* Sometimes the CPU reports invalid exceptions on prefetch.
    Check that here and ignore.
    Opcode checker based on code by Richard Brunner */
@@ -232,7 +211,7 @@ static noinline void pgtable_bad(unsigne
 }
 
 int page_fault_trace; 
-int exception_trace = 1;
+int exception_trace = 0;
 
 /*
  * This routine handles page faults.  It determines the address,
@@ -277,7 +256,7 @@ asmlinkage void do_page_fault(struct pt_
 		local_irq_enable();
 
 	if (unlikely(page_fault_trace))
-		printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+		ve_printk(VE_LOG, "pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
 		       regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); 
 
 	tsk = current;
@@ -320,7 +299,6 @@ asmlinkage void do_page_fault(struct pt_
 	if (unlikely(in_atomic() || !mm))
 		goto bad_area_nosemaphore;
 
- again:
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
@@ -424,7 +402,7 @@ bad_area_nosemaphore:
 			return;
 
 		if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
-			printk(KERN_INFO
+			ve_printk(VE_LOG, KERN_INFO 
 		       "%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
 					tsk->comm, tsk->pid, address, regs->rip,
 					regs->rsp, error_code);
@@ -474,7 +452,6 @@ no_context:
 		printk(KERN_ALERT "Unable to handle kernel paging request");
 	printk(" at %016lx RIP: \n" KERN_ALERT,address);
 	printk_address(regs->rip);
-	printk("\n");
 	dump_pagetable(address);
 	__die("Oops", regs, error_code);
 	/* Executive summary in case the body of the oops scrolled away */
@@ -489,13 +466,14 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) { 
-		yield();
-		goto again;
-	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
-		do_exit(SIGKILL);
+	if (error_code & 4) {
+		/* 
+		 * 0-order allocation always success if something really 
+		 * fatal not happen: beancounter overdraft or OOM. Den 
+		 */
+		force_sig(SIGKILL, tsk);
+		return;
+	}
 	goto no_context;
 
 do_sigbus:
diff -Nurap linux-2.6.9-100.orig/drivers/base/bus.c linux-2.6.9-ve023stab054/drivers/base/bus.c
--- linux-2.6.9-100.orig/drivers/base/bus.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/base/bus.c	2011-06-15 19:26:22.000000000 +0400
@@ -230,7 +230,7 @@ int bus_for_each_drv(struct bus_type * b
  *	device_bind_driver - bind a driver to one device.
  *	@dev:	device.
  *
- *	Allow manual attachment of a driver to a deivce.
+ *	Allow manual attachment of a driver to a device.
  *	Caller must have already set @dev->driver.
  *
  *	Note that this does not modify the bus reference count
@@ -246,6 +246,7 @@ void device_bind_driver(struct device * 
 	list_add_tail(&dev->driver_list, &dev->driver->devices);
 	sysfs_create_link(&dev->driver->kobj, &dev->kobj,
 			  kobject_name(&dev->kobj));
+	sysfs_create_link(&dev->kobj, &dev->driver->kobj, "driver");
 }
 
 
@@ -368,6 +369,7 @@ void device_release_driver(struct device
 	struct device_driver * drv = dev->driver;
 	if (drv) {
 		sysfs_remove_link(&drv->kobj, kobject_name(&dev->kobj));
+		sysfs_remove_link(&dev->kobj, "driver");
 		list_del_init(&dev->driver_list);
 		device_detach_shutdown(dev);
 		if (drv->remove)
@@ -443,6 +445,7 @@ int bus_add_device(struct device * dev)
 		up_write(&dev->bus->subsys.rwsem);
 		device_add_attrs(bus, dev);
 		sysfs_create_link(&bus->devices.kobj, &dev->kobj, dev->bus_id);
+		sysfs_create_link(&dev->kobj, &dev->bus->subsys.kset.kobj, "bus");
 	}
 	return error;
 }
@@ -459,6 +462,7 @@ int bus_add_device(struct device * dev)
 void bus_remove_device(struct device * dev)
 {
 	if (dev->bus) {
+		sysfs_remove_link(&dev->kobj, "bus");
 		sysfs_remove_link(&dev->bus->devices.kobj, dev->bus_id);
 		device_remove_attrs(dev->bus, dev);
 		down_write(&dev->bus->subsys.rwsem);
@@ -514,7 +518,7 @@ int bus_add_driver(struct device_driver 
 
 	if (bus) {
 		pr_debug("bus %s: add driver %s\n", bus->name, drv->name);
-		error = kobject_set_name(&drv->kobj, drv->name);
+		error = kobject_set_name(&drv->kobj, "%s", drv->name);
 		if (error) {
 			put_bus(bus);
 			return error;
@@ -663,7 +667,7 @@ int bus_register(struct bus_type * bus)
 {
 	int retval;
 
-	retval = kobject_set_name(&bus->subsys.kset.kobj, bus->name);
+	retval = kobject_set_name(&bus->subsys.kset.kobj, "%s", bus->name);
 	if (retval)
 		goto out;
 
diff -Nurap linux-2.6.9-100.orig/drivers/base/class.c linux-2.6.9-ve023stab054/drivers/base/class.c
--- linux-2.6.9-100.orig/drivers/base/class.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/base/class.c	2011-06-15 19:26:22.000000000 +0400
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/err.h>
+#include <linux/kdev_t.h>
 #include "base.h"
 
 #define to_class_attr(_attr) container_of(_attr, struct class_attribute, attr)
@@ -70,8 +71,13 @@ static struct kobj_type ktype_class = {
 };
 
 /* Hotplug events for classes go to the class_obj subsys */
-static decl_subsys(class, &ktype_class, NULL);
+decl_subsys(class, &ktype_class, NULL);
 
+#ifndef CONFIG_VE
+#define visible_class_subsys class_subsys
+#else
+#define visible_class_subsys (*get_exec_env()->class_subsys)
+#endif
 
 int class_create_file(struct class * cls, const struct class_attribute * attr)
 {
@@ -140,11 +146,11 @@ int class_register(struct class * cls)
 
 	INIT_LIST_HEAD(&cls->children);
 	INIT_LIST_HEAD(&cls->interfaces);
-	error = kobject_set_name(&cls->subsys.kset.kobj, cls->name);
+	error = kobject_set_name(&cls->subsys.kset.kobj, "%s", cls->name);
 	if (error)
 		return error;
 
-	subsys_set_kset(cls, class_subsys);
+	subsys_set_kset(cls, visible_class_subsys);
 
 	error = subsystem_register(&cls->subsys);
 	if (!error) {
@@ -180,33 +186,22 @@ void class_device_remove_file(struct cla
 		sysfs_remove_file(&class_dev->kobj, &attr->attr);
 }
 
-static int class_device_dev_link(struct class_device * class_dev)
+int class_device_create_bin_file(struct class_device *class_dev,
+				 struct bin_attribute *attr)
 {
-	if (class_dev->dev)
-		return sysfs_create_link(&class_dev->kobj,
-					 &class_dev->dev->kobj, "device");
-	return 0;
-}
-
-static void class_device_dev_unlink(struct class_device * class_dev)
-{
-	sysfs_remove_link(&class_dev->kobj, "device");
-}
-
-static int class_device_driver_link(struct class_device * class_dev)
-{
-	if ((class_dev->dev) && (class_dev->dev->driver))
-		return sysfs_create_link(&class_dev->kobj,
-					 &class_dev->dev->driver->kobj, "driver");
-	return 0;
+	int error = -EINVAL;
+	if (class_dev)
+		error = sysfs_create_bin_file(&class_dev->kobj, attr);
+	return error;
 }
 
-static void class_device_driver_unlink(struct class_device * class_dev)
+void class_device_remove_bin_file(struct class_device *class_dev,
+				  struct bin_attribute *attr)
 {
-	sysfs_remove_link(&class_dev->kobj, "driver");
+	if (class_dev)
+		sysfs_remove_bin_file(&class_dev->kobj, attr);
 }
 
-
 static ssize_t
 class_device_attr_show(struct kobject * kobj, struct attribute * attr,
 		       char * buf)
@@ -283,9 +278,49 @@ static int class_hotplug(struct kset *ks
 			 int num_envp, char *buffer, int buffer_size)
 {
 	struct class_device *class_dev = to_class_dev(kobj);
+	int i = 0;
+	int length = 0;
 	int retval = 0;
 
 	pr_debug("%s - name = %s\n", __FUNCTION__, class_dev->class_id);
+
+	if (class_dev->dev) {
+		/* add physical device, backing this device  */
+		struct device *dev = class_dev->dev;
+		char *path = kobject_get_path(&dev->kobj, GFP_KERNEL);
+
+		add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
+				    &length, "PHYSDEVPATH=%s", path);
+		kfree(path);
+
+		if (dev->bus)
+			add_hotplug_env_var(envp, num_envp, &i,
+					    buffer, buffer_size, &length,
+					    "PHYSDEVBUS=%s", dev->bus->name);
+
+		if (dev->driver)
+			add_hotplug_env_var(envp, num_envp, &i,
+					    buffer, buffer_size, &length,
+					    "PHYSDEVDRIVER=%s", dev->driver->name);
+	}
+
+	if (MAJOR(class_dev->devt)) {
+		add_hotplug_env_var(envp, num_envp, &i,
+				    buffer, buffer_size, &length,
+				    "MAJOR=%u", MAJOR(class_dev->devt));
+
+		add_hotplug_env_var(envp, num_envp, &i,
+				    buffer, buffer_size, &length,
+				    "MINOR=%u", MINOR(class_dev->devt));
+	}
+
+	/* terminate, set to next free slot, shrink available space */
+	envp[i] = NULL;
+	envp = &envp[i];
+	num_envp -= i;
+	buffer = &buffer[length];
+	buffer_size -= length;
+
 	if (class_dev->class->hotplug) {
 		/* have the bus specific function add its stuff */
 		retval = class_dev->class->hotplug (class_dev, envp, num_envp,
@@ -305,8 +340,13 @@ static struct kset_hotplug_ops class_hot
 	.hotplug =	class_hotplug,
 };
 
-static decl_subsys(class_obj, &ktype_class_device, &class_hotplug_ops);
+decl_subsys(class_obj, &ktype_class_device, &class_hotplug_ops);
 
+#ifndef CONFIG_VE
+#define visible_class_obj_subsys class_obj_subsys
+#else
+#define visible_class_obj_subsys (*get_exec_env()->class_obj_subsys)
+#endif
 
 static int class_device_add_attrs(struct class_device * cd)
 {
@@ -341,9 +381,22 @@ static void class_device_remove_attrs(st
 	}
 }
 
+static ssize_t show_dev(struct class_device *class_dev, char *buf)
+{
+	return print_dev_t(buf, class_dev->devt);
+}
+static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
+
+static ssize_t store_uevent(struct class_device *class_dev,
+	const char *buf, size_t count)
+{
+	kobject_hotplug(&class_dev->kobj, KOBJ_ADD);
+	return count;
+}
+
 void class_device_initialize(struct class_device *class_dev)
 {
-	kobj_set_kset_s(class_dev, class_obj_subsys);
+	kobj_set_kset_s(class_dev, visible_class_obj_subsys);
 	kobject_init(&class_dev->kobj);
 	INIT_LIST_HEAD(&class_dev->node);
 }
@@ -369,7 +422,7 @@ int class_device_add(struct class_device
 		 class_dev->class_id);
 
 	/* first, register with generic layer. */
-	kobject_set_name(&class_dev->kobj, class_dev->class_id);
+	kobject_set_name(&class_dev->kobj, "%s", class_dev->class_id);
 	if (parent)
 		class_dev->kobj.parent = &parent->subsys.kset.kobj;
 
@@ -385,9 +438,20 @@ int class_device_add(struct class_device
 				class_intf->add(class_dev);
 		up_write(&parent->subsys.rwsem);
 	}
+	/* add "uevent" sysfs attr to re-emit device hotplug event */
+	class_dev->uevent_attr.attr.name = "uevent";
+	class_dev->uevent_attr.attr.mode = S_IWUSR;
+	class_dev->uevent_attr.attr.owner = NULL;
+	class_dev->uevent_attr.store = store_uevent;
+	class_device_create_file(class_dev, &class_dev->uevent_attr);
+
+	if (MAJOR(class_dev->devt))
+		class_device_create_file(class_dev, &class_device_attr_dev);
+
 	class_device_add_attrs(class_dev);
-	class_device_dev_link(class_dev);
-	class_device_driver_link(class_dev);
+	if (class_dev->dev)
+		sysfs_create_link(&class_dev->kobj,
+				  &class_dev->dev->kobj, "device");
 
  register_done:
 	if (error && parent)
@@ -416,8 +480,9 @@ void class_device_del(struct class_devic
 		up_write(&parent->subsys.rwsem);
 	}
 
-	class_device_dev_unlink(class_dev);
-	class_device_driver_unlink(class_dev);
+	if (class_dev->dev)
+		sysfs_remove_link(&class_dev->kobj, "device");
+	class_device_remove_file(class_dev, &class_dev->uevent_attr);
 	class_device_remove_attrs(class_dev);
 
 	kobject_del(&class_dev->kobj);
@@ -641,10 +706,19 @@ error:
 	return ERR_PTR(retval);
 }
 
+void prepare_sysfs_classes(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->class_subsys = &class_subsys;
+	get_ve0()->class_obj_subsys = &class_obj_subsys;
+#endif
+}
+
 int __init classes_init(void)
 {
 	int retval;
 
+	prepare_sysfs_classes();
 	retval = subsystem_register(&class_subsys);
 	if (retval)
 		return retval;
@@ -676,6 +750,11 @@ EXPORT_SYMBOL_GPL(class_device_put);
 EXPORT_SYMBOL_GPL(class_device_create);
 EXPORT_SYMBOL_GPL(class_device_create_file);
 EXPORT_SYMBOL_GPL(class_device_remove_file);
+EXPORT_SYMBOL_GPL(class_device_create_bin_file);
+EXPORT_SYMBOL_GPL(class_device_remove_bin_file);
 
 EXPORT_SYMBOL_GPL(class_interface_register);
 EXPORT_SYMBOL_GPL(class_interface_unregister);
+
+EXPORT_SYMBOL(class_subsys);
+EXPORT_SYMBOL(class_obj_subsys);
diff -Nurap linux-2.6.9-100.orig/drivers/base/class_simple.c linux-2.6.9-ve023stab054/drivers/base/class_simple.c
--- linux-2.6.9-100.orig/drivers/base/class_simple.c	2011-06-09 19:22:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/base/class_simple.c	2011-06-15 19:26:22.000000000 +0400
@@ -10,23 +10,24 @@
 
 #include <linux/config.h>
 #include <linux/device.h>
-#include <linux/kdev_t.h>
 #include <linux/err.h>
 
 struct class_simple {
-	struct class_device_attribute attr;
 	struct class class;
 };
 #define to_class_simple(d) container_of(d, struct class_simple, class)
 
 struct simple_dev {
 	struct list_head node;
-	dev_t dev;
 	struct class_device class_dev;
 };
 #define to_simple_dev(d) container_of(d, struct simple_dev, class_dev)
 
+#ifdef CONFIG_VE
+#define simple_dev_list	(get_exec_env()->_simple_dev_list)
+#else
 static LIST_HEAD(simple_dev_list);
+#endif
 static spinlock_t simple_dev_list_lock = SPIN_LOCK_UNLOCKED;
 
 static void release_simple_dev(struct class_device *class_dev)
@@ -35,12 +36,6 @@ static void release_simple_dev(struct cl
 	kfree(s_dev);
 }
 
-static ssize_t show_dev(struct class_device *class_dev, char *buf)
-{
-	struct simple_dev *s_dev = to_simple_dev(class_dev);
-	return print_dev_t(buf, s_dev->dev);
-}
-
 static void class_simple_release(struct class *class)
 {
 	struct class_simple *cs = to_class_simple(class);
@@ -75,12 +70,6 @@ struct class_simple *class_simple_create
 	cs->class.class_release = class_simple_release;
 	cs->class.release = release_simple_dev;
 
-	cs->attr.attr.name = "dev";
-	cs->attr.attr.mode = S_IRUGO;
-	cs->attr.attr.owner = owner;
-	cs->attr.show = show_dev;
-	cs->attr.store = NULL;
-
 	retval = class_register(&cs->class);
 	if (retval)
 		goto error;
@@ -143,7 +132,7 @@ struct class_device *class_simple_device
 	}
 	memset(s_dev, 0x00, sizeof(*s_dev));
 
-	s_dev->dev = dev;
+	s_dev->class_dev.devt = dev;
 	s_dev->class_dev.dev = device;
 	s_dev->class_dev.class = &cs->class;
 
@@ -154,11 +143,6 @@ struct class_device *class_simple_device
 	if (retval)
 		goto error;
 
-	class_device_create_file(&s_dev->class_dev, &cs->attr);
-
-	/* make sure we create a hotplug event after the sysfs file is created */
-	kobject_hotplug("add", &s_dev->class_dev.kobj);
-
 	spin_lock(&simple_dev_list_lock);
 	list_add(&s_dev->node, &simple_dev_list);
 	spin_unlock(&simple_dev_list_lock);
@@ -203,7 +187,7 @@ void class_simple_device_remove(dev_t de
 
 	spin_lock(&simple_dev_list_lock);
 	list_for_each_entry(s_dev, &simple_dev_list, node) {
-		if (s_dev->dev == dev) {
+		if (s_dev->class_dev.devt == dev) {
 			found = 1;
 			break;
 		}
diff -Nurap linux-2.6.9-100.orig/drivers/base/core.c linux-2.6.9-ve023stab054/drivers/base/core.c
--- linux-2.6.9-100.orig/drivers/base/core.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/base/core.c	2011-06-15 19:26:22.000000000 +0400
@@ -116,9 +116,30 @@ static int dev_hotplug(struct kset *kset
 			int num_envp, char *buffer, int buffer_size)
 {
 	struct device *dev = to_dev(kobj);
+	int i = 0;
+	int length = 0;
 	int retval = 0;
 
-	if (dev->bus->hotplug) {
+	/* add bus name of physical device */
+	if (dev->bus)
+		add_hotplug_env_var(envp, num_envp, &i,
+				    buffer, buffer_size, &length,
+				    "PHYSDEVBUS=%s", dev->bus->name);
+
+	/* add driver name of physical device */
+	if (dev->driver)
+		add_hotplug_env_var(envp, num_envp, &i,
+				    buffer, buffer_size, &length,
+				    "PHYSDEVDRIVER=%s", dev->driver->name);
+
+	/* terminate, set to next free slot, shrink available space */
+	envp[i] = NULL;
+	envp = &envp[i];
+	num_envp -= i;
+	buffer = &buffer[length];
+	buffer_size -= length;
+
+	if (dev->bus && dev->bus->hotplug) {
 		/* have the bus specific function add its stuff */
 		retval = dev->bus->hotplug (dev, envp, num_envp, buffer, buffer_size);
 			if (retval) {
@@ -136,6 +157,13 @@ static struct kset_hotplug_ops device_ho
 	.hotplug =	dev_hotplug,
 };
 
+static ssize_t store_uevent(struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	kobject_hotplug(&dev->kobj, KOBJ_ADD);
+	return count;
+}
+
 /**
  *	device_subsys - structure to be registered with kobject core.
  */
@@ -237,12 +265,18 @@ int device_add(struct device *dev)
 	pr_debug("DEV: registering device: ID = '%s'\n", dev->bus_id);
 
 	/* first, register with generic layer. */
-	kobject_set_name(&dev->kobj, dev->bus_id);
+	kobject_set_name(&dev->kobj, "%s", dev->bus_id);
 	if (parent)
 		dev->kobj.parent = &parent->kobj;
 
 	if ((error = kobject_add(&dev->kobj)))
 		goto Error;
+
+	dev->uevent_attr.attr.name = "uevent";
+	dev->uevent_attr.attr.mode = S_IWUSR;
+	dev->uevent_attr.store = store_uevent;
+	device_create_file(dev, &dev->uevent_attr);
+
 	if ((error = device_pm_add(dev)))
 		goto PMError;
 	if ((error = bus_add_device(dev)))
@@ -335,6 +369,7 @@ void device_del(struct device * dev)
 	if (parent)
 		list_del_init(&dev->node);
 	up_write(&devices_subsys.rwsem);
+	device_remove_file(dev, &dev->uevent_attr);
 
 	/* Notify the platform of the removal, in case they
 	 * need to do anything...
diff -Nurap linux-2.6.9-100.orig/drivers/base/firmware_class.c linux-2.6.9-ve023stab054/drivers/base/firmware_class.c
--- linux-2.6.9-100.orig/drivers/base/firmware_class.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/base/firmware_class.c	2011-06-15 19:26:22.000000000 +0400
@@ -94,19 +94,17 @@ firmware_class_hotplug(struct class_devi
 		       int num_envp, char *buffer, int buffer_size)
 {
 	struct firmware_priv *fw_priv = class_get_devdata(class_dev);
-	int i = 0;
-	char *scratch = buffer;
+	int i = 0, len = 0;
 
 	if (!test_bit(FW_STATUS_READY, &fw_priv->status))
 		return -ENODEV;
 
-	if (buffer_size < (FIRMWARE_NAME_MAX + 10))
-		return -ENOMEM;
-	if (num_envp < 1)
+	if (add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			"FIRMWARE=%s", fw_priv->fw_id))
 		return -ENOMEM;
 
-	envp[i++] = scratch;
-	scratch += sprintf(scratch, "FIRMWARE=%s", fw_priv->fw_id) + 1;
+	envp[i] = NULL;
+
 	return 0;
 }
 
@@ -422,7 +420,7 @@ request_firmware(const struct firmware *
 		add_timer(&fw_priv->timeout);
 	}
 
-	kobject_hotplug("add", &class_dev->kobj);
+	kobject_hotplug(&class_dev->kobj, KOBJ_ADD);
 	wait_for_completion(&fw_priv->completion);
 	set_bit(FW_STATUS_DONE, &fw_priv->status);
 
diff -Nurap linux-2.6.9-100.orig/drivers/block/floppy.c linux-2.6.9-ve023stab054/drivers/block/floppy.c
--- linux-2.6.9-100.orig/drivers/block/floppy.c	2004-10-19 01:53:22.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/block/floppy.c	2011-06-15 19:26:19.000000000 +0400
@@ -3774,7 +3774,7 @@ static int floppy_open(struct inode *ino
 	 * Needed so that programs such as fdrawcmd still can work on write
 	 * protected disks */
 	if (filp->f_mode & 2
-	    || permission(filp->f_dentry->d_inode, 2, NULL) == 0)
+	    || permission(filp->f_dentry->d_inode, 2, NULL, NULL) == 0)
 		filp->private_data = (void *)8;
 
 	if (UFDCS->rawcmd == 1)
diff -Nurap linux-2.6.9-100.orig/drivers/block/genhd.c linux-2.6.9-ve023stab054/drivers/block/genhd.c
--- linux-2.6.9-100.orig/drivers/block/genhd.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/block/genhd.c	2011-06-15 19:26:22.000000000 +0400
@@ -17,6 +17,8 @@
 #include <linux/hash.h>
 
 static struct subsystem block_subsys;
+struct subsystem *get_block_subsys(void) {return &block_subsys;}
+EXPORT_SYMBOL(get_block_subsys);
 
 /*
  * Can be deleted altogether. Later.
@@ -345,6 +347,7 @@ subsys_initcall(device_init);
 struct disk_attribute {
 	struct attribute attr;
 	ssize_t (*show)(struct gendisk *, char *);
+	ssize_t (*store)(struct gendisk *, const char *, size_t);
 };
 
 static ssize_t disk_attr_show(struct kobject *kobj, struct attribute *attr,
@@ -360,10 +363,31 @@ static ssize_t disk_attr_show(struct kob
 	return ret;
 }
 
+static ssize_t disk_attr_store(struct kobject * kobj, struct attribute * attr,
+	const char *page, size_t count)
+{
+	struct gendisk *disk = to_disk(kobj);
+	struct disk_attribute *disk_attr =
+	container_of(attr,struct disk_attribute,attr);
+	ssize_t ret = 0;
+
+	if (disk_attr->store)
+		ret = disk_attr->store(disk, page, count);
+	return ret;
+}
+
 static struct sysfs_ops disk_sysfs_ops = {
 	.show	= &disk_attr_show,
+	.store  = &disk_attr_store,
 };
 
+static ssize_t disk_uevent_store(struct gendisk * disk,
+	const char *buf, size_t count)
+{
+	kobject_hotplug(&disk->kobj, KOBJ_ADD);
+	return count;
+}
+
 static ssize_t disk_dev_read(struct gendisk * disk, char *page)
 {
 	dev_t base = MKDEV(disk->major, disk->first_minor); 
@@ -405,6 +429,10 @@ static ssize_t disk_stats_read(struct ge
 		jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
 		jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
 }
+static struct disk_attribute disk_attr_uevent = {
+	.attr = {.name = "uevent", .mode = S_IWUSR },
+	.store  = disk_uevent_store
+};
 static struct disk_attribute disk_attr_dev = {
 	.attr = {.name = "dev", .mode = S_IRUGO },
 	.show	= disk_dev_read
@@ -427,6 +455,7 @@ static struct disk_attribute disk_attr_s
 };
 
 static struct attribute * default_attrs[] = {
+	&disk_attr_uevent.attr,
 	&disk_attr_dev.attr,
 	&disk_attr_range.attr,
 	&disk_attr_removable.attr,
@@ -459,8 +488,67 @@ static int block_hotplug_filter(struct k
 	return ((ktype == &ktype_block) || (ktype == &ktype_part));
 }
 
+static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
+			 int num_envp, char *buffer, int buffer_size)
+{
+	struct kobj_type *ktype = get_ktype(kobj);
+	struct device *physdev;
+	struct gendisk *disk;
+	struct hd_struct *part;
+	int length = 0;
+	int i = 0;
+
+	if (ktype == &ktype_block) {
+		disk = container_of(kobj, struct gendisk, kobj);
+		add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
+				    &length, "MINOR=%u", disk->first_minor);
+	} else if (ktype == &ktype_part) {
+		disk = container_of(kobj->parent, struct gendisk, kobj);
+		part = container_of(kobj, struct hd_struct, kobj);
+		add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
+				    &length, "MINOR=%u",
+				    disk->first_minor + part->partno);
+	} else 
+		return 0;
+
+	add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &length,
+			    "MAJOR=%u", disk->major);
+
+	/* add physical device, backing this device  */
+	physdev = disk->driverfs_dev;
+	if (physdev) {
+		char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL);
+
+		add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
+				    &length, "PHYSDEVPATH=%s", path);
+		kfree(path);
+
+		if (physdev->bus)
+			add_hotplug_env_var(envp, num_envp, &i,
+					    buffer, buffer_size, &length,
+					    "PHYSDEVBUS=%s",
+					    physdev->bus->name);
+
+		if (physdev->driver)
+			add_hotplug_env_var(envp, num_envp, &i,
+					    buffer, buffer_size, &length,
+					    "PHYSDEVDRIVER=%s",
+					    physdev->driver->name);
+	}
+
+	/* terminate, set to next free slot, shrink available space */
+	envp[i] = NULL;
+	envp = &envp[i];
+	num_envp -= i;
+	buffer = &buffer[length];
+	buffer_size -= length;
+
+	return 0;
+}
+
 static struct kset_hotplug_ops block_hotplug_ops = {
-	.filter	= block_hotplug_filter,
+	.filter		= block_hotplug_filter,
+	.hotplug	= block_hotplug,
 };
 
 /* declare block_subsys. */
diff -Nurap linux-2.6.9-100.orig/drivers/block/ll_rw_blk.c linux-2.6.9-ve023stab054/drivers/block/ll_rw_blk.c
--- linux-2.6.9-100.orig/drivers/block/ll_rw_blk.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/block/ll_rw_blk.c	2011-06-15 19:26:18.000000000 +0400
@@ -2419,7 +2419,7 @@ EXPORT_SYMBOL(__blk_attempt_remerge);
 static int __make_request(request_queue_t *q, struct bio *bio)
 {
 	struct request *req, *freereq = NULL;
-	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err;
+	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
 	sector_t sector;
 
 	sector = bio->bi_sector;
@@ -2467,6 +2467,7 @@ again:
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req);
+			sync = bio_sync(bio);
 			goto out;
 
 		case ELEVATOR_FRONT_MERGE:
@@ -2493,6 +2494,7 @@ again:
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req);
+			sync = bio_sync(bio);
 			goto out;
 
 		/*
@@ -2558,11 +2560,12 @@ get_rq:
 	req->rq_disk = bio->bi_bdev->bd_disk;
 	req->start_time = jiffies;
 
+	sync = bio_sync(bio);
 	add_request(q, req);
 out:
 	if (freereq)
 		__blk_put_request(q, freereq);
-	if (bio_sync(bio))
+	if (sync)
 		__generic_unplug_device(q);
 
 	spin_unlock_irq(q->queue_lock);
diff -Nurap linux-2.6.9-100.orig/drivers/char/keyboard.c linux-2.6.9-ve023stab054/drivers/char/keyboard.c
--- linux-2.6.9-100.orig/drivers/char/keyboard.c	2004-10-19 01:55:36.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/char/keyboard.c	2011-06-15 19:26:22.000000000 +0400
@@ -152,6 +152,7 @@ unsigned char kbd_sysrq_xlate[KEY_MAX] =
         "\r\000/";                                      /* 0x60 - 0x6f */
 static int sysrq_down;
 #endif
+int sysrq_key_scancode = KEY_SYSRQ;
 static int sysrq_alt;
 
 /*
@@ -990,7 +991,7 @@ static int emulate_raw(struct vc_data *v
 			return 0;
 	} 
 
-	if (keycode == KEY_SYSRQ && sysrq_alt) {
+	if ((keycode == sysrq_key_scancode || keycode == KEY_SYSRQ) && sysrq_alt) {
 		put_queue(vc, 0x54 | up_flag);
 		return 0;
 	}
@@ -1072,11 +1073,12 @@ void kbd_keycode(unsigned int keycode, i
 				printk(KERN_WARNING "keyboard.c: can't emulate rawmode for keycode %d\n", keycode);
 
 #ifdef CONFIG_MAGIC_SYSRQ	       /* Handle the SysRq Hack */
-	if (keycode == KEY_SYSRQ && (sysrq_down || (down == 1 && sysrq_alt))) {
+	if ((keycode == sysrq_key_scancode || keycode == KEY_SYSRQ) &&
+				(sysrq_down || (down == 1 && sysrq_alt))) {
 		sysrq_down = down;
 		return;
 	}
-	if (sysrq_down && down && !rep) {
+	if ((sysrq_down || sysrq_eat_all()) && down && !rep) {
 		handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty);
 		return;
 	}
diff -Nurap linux-2.6.9-100.orig/drivers/char/mem.c linux-2.6.9-ve023stab054/drivers/char/mem.c
--- linux-2.6.9-100.orig/drivers/char/mem.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/char/mem.c	2011-06-15 19:26:19.000000000 +0400
@@ -257,8 +257,6 @@ static ssize_t read_kmem(struct file *fi
 	ssize_t virtr = 0;
 	char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
 	
-	return -EPERM;
-		
 	if (p < (unsigned long) high_memory) {
 		read = count;
 		if (count > (unsigned long) high_memory - p)
@@ -670,6 +668,7 @@ static const struct {
 	struct file_operations	*fops;
 } devlist[] = { /* list of minor devices */
 	{1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
+	{2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
 	{3, "null",    S_IRUGO | S_IWUGO,           &null_fops},
 #if defined(CONFIG_ISA) || !defined(__mc68000__)
 	{4, "port",    S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
diff -Nurap linux-2.6.9-100.orig/drivers/char/n_tty.c linux-2.6.9-ve023stab054/drivers/char/n_tty.c
--- linux-2.6.9-100.orig/drivers/char/n_tty.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/char/n_tty.c	2011-06-15 19:26:18.000000000 +0400
@@ -1151,7 +1151,7 @@ static inline int copy_from_read_buf(str
 
 {
 	int retval;
-	ssize_t n;
+	size_t n;
 	unsigned long flags;
 
 	retval = 0;
diff -Nurap linux-2.6.9-100.orig/drivers/char/pty.c linux-2.6.9-ve023stab054/drivers/char/pty.c
--- linux-2.6.9-100.orig/drivers/char/pty.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/char/pty.c	2011-06-15 19:26:20.000000000 +0400
@@ -32,16 +32,36 @@
 #include <asm/bitops.h>
 #include <linux/devpts_fs.h>
 
+#include <ub/ub_misc.h>
+
 /* These are global because they are accessed in tty_io.c */
 #ifdef CONFIG_UNIX98_PTYS
 struct tty_driver *ptm_driver;
 struct tty_driver *pts_driver;
+EXPORT_SYMBOL(ptm_driver);
+EXPORT_SYMBOL(pts_driver);
+
+#ifdef CONFIG_VE
+#define ve_ptm_driver	(get_exec_env()->ptm_driver)
+#else
+#define ve_ptm_driver	ptm_driver
+#endif
+
+void prepare_pty(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->ptm_driver = ptm_driver;
+	/* don't clean ptm_driver and co. here, they are used in vecalls.c */
+#endif
+}
 #endif
 
 static void pty_close(struct tty_struct * tty, struct file * filp)
 {
 	if (!tty)
 		return;
+
+	ub_pty_uncharge(tty);
 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
 		if (tty->count > 1)
 			printk("master pty_close: count = %d!!\n", tty->count);
@@ -61,8 +81,12 @@ static void pty_close(struct tty_struct 
 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
 		set_bit(TTY_OTHER_CLOSED, &tty->flags);
 #ifdef CONFIG_UNIX98_PTYS
-		if (tty->driver == ptm_driver)
+		if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+			struct ve_struct *old_env;
+			old_env = set_exec_env(VE_OWNER_TTY(tty));
 			devpts_pty_kill(tty->index);
+			set_exec_env(old_env);
+		}
 #endif
 		tty_vhangup(tty->link);
 	}
@@ -235,6 +259,8 @@ static int pty_open(struct tty_struct *t
 
 	if (!tty || !tty->link)
 		goto out;
+	if (ub_pty_charge(tty))
+		goto out;
 
 	retval = -EIO;
 	if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
@@ -271,7 +297,10 @@ static struct tty_operations pty_ops = {
 
 /* Traditional BSD devices */
 #ifdef CONFIG_LEGACY_PTYS
-static struct tty_driver *pty_driver, *pty_slave_driver;
+struct tty_driver *pty_driver, *pty_slave_driver;
+
+EXPORT_SYMBOL(pty_driver);
+EXPORT_SYMBOL(pty_slave_driver);
 
 static int pty_bsd_ioctl(struct tty_struct *tty, struct file *file,
 			 unsigned int cmd, unsigned long arg)
@@ -447,6 +476,7 @@ static int __init pty_init(void)
 {
 	legacy_pty_init();
 	unix98_pty_init();
+	prepare_pty();
 	return 0;
 }
 module_init(pty_init);
diff -Nurap linux-2.6.9-100.orig/drivers/char/sysrq.c linux-2.6.9-ve023stab054/drivers/char/sysrq.c
--- linux-2.6.9-100.orig/drivers/char/sysrq.c	2011-06-09 19:22:53.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/char/sysrq.c	2011-06-15 19:26:22.000000000 +0400
@@ -31,10 +31,14 @@
 #include <linux/suspend.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>		/* for fsync_bdev() */
+#include <linux/kallsyms.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
 
 #include <linux/spinlock.h>
 
 #include <asm/ptrace.h>
+#include <asm/uaccess.h>
 
 extern void reset_vc(unsigned int);
 
@@ -144,16 +148,426 @@ static struct sysrq_key_op sysrq_mountro
 	.action_msg	= "Emergency Remount R/O",
 };
 
+#ifdef CONFIG_SYSRQ_DEBUG
+/*
+ * Alt-SysRq debugger
+ * Implemented functions:
+ *	dumping memory
+ *	resolvind symbols
+ *	writing memory
+ *	quitting :)
+ */
+
+void print_addr(unsigned long val)
+{
+	if (__kernel_text_address(val))
+		printk("[<%0*lx>] ", sizeof(long) * 2, val);
+	else
+		printk("  %0*lx   ", sizeof(long) * 2, val);
+}
+
+/* Memory accessing routines */
+#define DUMP_BYTES	(22 * 16)	/* Exact 22 lines for all arch's */
+unsigned long *dumpmem_addr;
+
+/* VALS = 2 for 64bit architectures and 4 otherwise */
+#define VALS	(16 / sizeof(long))
+
+static void dump_mem(int bytes)
+{
+	int n, errmask, i;
+	unsigned long val[VALS];
+	int zeroes;
+	mm_segment_t old_fs;
+
+	zeroes = 0;
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	for (n = 0; n < bytes; n += VALS * sizeof(unsigned long)) {
+		/* read values and collect errors */
+		errmask = 0;
+		for (i = 0; i < VALS; i++)
+			if (__get_user(val[i], dumpmem_addr + i))
+				errmask |= 1 << i;
+
+		/* If all values are equal to zero, remember it and continue */
+		if (!errmask) {
+			for (i = 0; i < VALS; i++)
+				if (val[i])
+					break;
+			if (i == VALS) {
+				/* All values are equal to 0 */
+				zeroes++;
+				dumpmem_addr += VALS;
+				continue;
+			}
+		}
+
+		/* Something is found */
+
+		if (zeroes) {
+			printk(" ...\n");
+			zeroes = 0;
+		}
+
+		printk("0x%p: ", dumpmem_addr);
+		for (i = 0; i < VALS; i++) {
+			if (errmask & (1 << i))
+				printk("  %*s   ", sizeof(long) * 2, "?");
+			else
+				print_addr(val[i]);
+			dumpmem_addr++;
+		}
+		printk("\n");
+	}
+	if (zeroes)
+		printk(" ...\n");
+	set_fs(old_fs);
+}
+
+static unsigned long *writemem_addr;
+
+static void write_mem(unsigned long val)
+{
+	mm_segment_t old_fs;
+	unsigned long old_val;
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	if (__get_user(old_val, writemem_addr))
+		goto err;
+	printk("Changing [0x%p] %08lX to %08lX\n", writemem_addr, old_val, val);
+	__put_user(val, writemem_addr);
+err:
+	set_fs(old_fs);
+}
+
+/* reading user input */
+#define NAME_LEN (64)
+static struct {
+	unsigned long hex;
+	char name[NAME_LEN + 1];
+	void (*entered)(void);
+} debug_input;
+
+static void debug_read_hex(int key)
+{
+	static int entered = 0;
+	int val;
+
+	if (key >= '0' && key <= '9')
+		val = key - '0';
+	else if (key >= 'a' && key <= 'f')
+		val = key - 'a' + 0xa;
+	else
+		return;
+
+	entered++;
+	debug_input.hex = (debug_input.hex << 4) + val;
+	printk("%c", key);
+	if (entered != sizeof(unsigned long) * 2)
+		return;
+
+	printk("\n");
+	entered = 0;
+	debug_input.entered();
+}
+
+static void debug_read_string(int key)
+{
+	static int pos;
+	static int shift;
+
+	if (key == 0) {
+		/* actually key == 0 not only for shift */
+		shift = 1;
+		return;
+	}
+
+	if (key == 0x0d) /* enter */
+		goto finish;
+
+	if (key >= 'a' && key <= 'z') {
+		if (shift)
+			key = key - 'a' + 'A';
+		goto correct;
+	} 
+	if (key == '-') {
+		if (shift)
+			key = '_';
+		goto correct;
+	}
+	if (key >= '0' && key <= '9')
+		goto correct;
+	return;
+
+correct:
+	debug_input.name[pos] = key;
+	pos++;
+	shift = 0;
+	printk("%c", key);
+	if (pos != NAME_LEN)
+		return;
+
+finish:
+	printk("\n");
+	pos = 0;
+	shift = 0;
+	debug_input.entered();
+	memset(debug_input.name, 0, NAME_LEN);
+}
+
+static int sysrq_debug_mode;
+#define DEBUG_SELECT_ACTION	1
+#define DEBUG_READ_INPUT	2
+static struct sysrq_key_op *debug_sysrq_key_table[];
+static void (*handle_debug_input)(int key);
+static void swap_opts(struct sysrq_key_op **);
+#define PROMPT	"> "
+
+int sysrq_eat_all(void)
+{
+	return sysrq_debug_mode;
+}
+
+static inline void debug_switch_read_input(void (*fn_read)(int),
+		void (*fn_fini)(void))
+{
+	WARN_ON(fn_read == NULL || fn_fini == NULL);
+	debug_input.entered = fn_fini;
+	handle_debug_input = fn_read;
+	sysrq_debug_mode = DEBUG_READ_INPUT;
+}
+
+static inline void debug_switch_select_action(void)
+{
+	sysrq_debug_mode = DEBUG_SELECT_ACTION;
+	handle_debug_input = NULL;
+	printk(PROMPT);
+}
+
+/* handle key press in debug mode */
+static void __handle_debug(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	if (sysrq_debug_mode == DEBUG_SELECT_ACTION) {
+		__handle_sysrq(key, pt_regs, tty);
+		if (sysrq_debug_mode)
+			printk(PROMPT);
+	} else {
+		__sysrq_lock_table();
+		handle_debug_input(key);
+		__sysrq_unlock_table();
+	}
+}
+
+/* dump memory */
+static void debug_dumpmem_addr_entered(void)
+{
+	dumpmem_addr = (unsigned long *)debug_input.hex;
+	dump_mem(DUMP_BYTES);
+	debug_switch_select_action();
+}
+
+static void sysrq_handle_dumpmem(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	debug_switch_read_input(debug_read_hex, debug_dumpmem_addr_entered);
+}
+static struct sysrq_key_op sysrq_debug_dumpmem = {
+	.handler	= sysrq_handle_dumpmem,
+	.help_msg	= "Dump memory\n",
+	.action_msg	= "Enter address",
+};
+
+static void sysrq_handle_dumpnext(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	dump_mem(DUMP_BYTES);
+}
+static struct sysrq_key_op sysrq_debug_dumpnext = {
+	.handler	= sysrq_handle_dumpnext,
+	.help_msg	= "dump neXt\n",
+	.action_msg	= "",
+};
+
+/* resolve symbol */
+static void debug_resolve_name_entered(void)
+{
+	unsigned long sym_addr;
+
+	sym_addr = kallsyms_lookup_name(debug_input.name);
+	printk("%s: %08lX\n", debug_input.name, sym_addr);
+	if (sym_addr) {
+		printk("Now you can dump it via X\n");
+		dumpmem_addr = (unsigned long *)sym_addr;
+	}
+	debug_switch_select_action();
+}
+
+static void sysrq_handle_resolve(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	debug_switch_read_input(debug_read_string, debug_resolve_name_entered);
+}
+static struct sysrq_key_op sysrq_debug_resolve = {
+	.handler	= sysrq_handle_resolve,
+	.help_msg	= "Resolve symbol\n",
+	.action_msg	= "Enter symbol name",
+};
+
+/* write memory */
+static void debug_writemem_val_entered(void)
+{
+	write_mem(debug_input.hex);
+	debug_switch_select_action();
+}
+
+static void debug_writemem_addr_entered(void)
+{
+	mm_segment_t old_fs;
+	unsigned long val;
+
+	writemem_addr = (unsigned long *)debug_input.hex;
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	if (!__get_user(val, writemem_addr))
+		printk(" [0x%p] = %08lX\n", writemem_addr, val);
+	set_fs(old_fs);
+	debug_switch_read_input(debug_read_hex, debug_writemem_val_entered);
+}
+
+static void sysrq_handle_writemem(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	debug_switch_read_input(debug_read_hex, debug_writemem_addr_entered);
+}
+static struct sysrq_key_op sysrq_debug_writemem = {
+	.handler	= sysrq_handle_writemem,
+	.help_msg	= "Write memory\n",
+	.action_msg	= "Enter address and then value",
+};
+
+/* switch to debug mode */
+static void sysrq_handle_debug(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	swap_opts(debug_sysrq_key_table);
+	printk("Welcome sysrq debugging mode\n"
+			"Press H for help\n");
+	debug_switch_select_action();
+}
+static struct sysrq_key_op sysrq_debug_enter = {
+	.handler	= sysrq_handle_debug,
+	.help_msg	= "start Degugging",
+	.action_msg	= "Select desired action",
+};
+
+/* quit debug mode */
+static void sysrq_handle_quit(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	swap_opts(NULL);
+	sysrq_debug_mode = 0;
+}
+static struct sysrq_key_op sysrq_debug_quit = {
+	.handler	= sysrq_handle_quit,
+	.help_msg	= "Quit debug mode\n",
+	.action_msg	= "Thank you for using debugger",
+};
+#endif
+
 /* END SYNC SYSRQ HANDLERS BLOCK */
 
 
 /* SHOW SYSRQ HANDLERS BLOCK */
 
+static void show_regs_noregs(void)
+{
+	struct task_struct *p;
+
+	p = current;
+	printk("\n");
+	printk("Pid: %d, comm: %.20s, CPU: %d, VCPU: %d:%d\n",
+			p->pid, p->comm,
+			smp_processor_id(), task_vsched_id(p), task_cpu(p));
+	dump_stack();
+}
+
+#ifdef CONFIG_SMP
+
+static spinlock_t show_regs_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t show_regs_ser = SPIN_LOCK_UNLOCKED;
+struct show_regs_state {
+	struct timer_list	timer;
+};
+static DEFINE_PER_CPU(struct show_regs_state, show_regs_state);
+
+static void show_regs_other(unsigned long cpu)
+{
+	if (!spin_trylock(&show_regs_ser)) {
+		mod_timer(&per_cpu(show_regs_state, cpu).timer, jiffies + 1);
+		return;
+	}
+
+	if (cpu == smp_processor_id()) /* CPU might be dead */
+		show_regs_noregs();
+	spin_unlock(&show_regs_ser);
+}
+
+static void schedule_show_regs(void)
+{
+	int cpu, i;
+	struct timer_list *t;
+	unsigned long flags;
+
+	cpu = smp_processor_id();
+	spin_lock_irqsave(&show_regs_lock, flags);
+	for_each_online_cpu(i) {
+		if (i == cpu)
+			continue;
+		t = &per_cpu(show_regs_state, i).timer;
+		if (timer_pending(t))
+			continue;
+		/* this may race with timer function, but we don't care */
+		t->expires = jiffies;
+		add_timer_on(t, i);
+	}
+	spin_unlock_irqrestore(&show_regs_lock, flags);
+}
+
+static int show_regs_init(void)
+{
+	int i;
+	struct timer_list *t;
+
+	for_each_cpu(i) {
+		t = &per_cpu(show_regs_state, i).timer;
+		init_timer(t);
+		t->function = &show_regs_other;
+		t->data = i;
+	}
+	return 0;
+}
+
+postcore_initcall(show_regs_init);
+
+#endif
+
 static void sysrq_handle_showregs(int key, struct pt_regs *pt_regs,
 				  struct tty_struct *tty) 
 {
+	preempt_disable();
 	if (pt_regs)
 		show_regs(pt_regs);
+	else
+		show_regs_noregs();
+#ifdef CONFIG_SMP
+	if (num_online_cpus() > 1)
+		schedule_show_regs();
+#endif
+	preempt_enable();
 }
 static struct sysrq_key_op sysrq_showregs_op = {
 	.handler	= sysrq_handle_showregs,
@@ -178,6 +592,7 @@ static void sysrq_handle_showmem(int key
 				 struct tty_struct *tty) 
 {
 	show_mem();
+	show_slab_info();
 }
 static struct sysrq_key_op sysrq_showmem_op = {
 	.handler	= sysrq_handle_showmem,
@@ -216,7 +631,7 @@ static void send_sig_all(int sig)
 {
 	struct task_struct *p;
 
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (p->mm && p->pid != 1)
 			/* Not swapper, init nor kernel thread */
 			force_sig(sig, p);
@@ -247,13 +662,40 @@ static struct sysrq_key_op sysrq_kill_op
 	.action_msg	= "Kill All Tasks",
 };
 
+#ifdef CONFIG_SCHED_VCPU
+static void sysrq_handle_vschedstate(int key, struct pt_regs *pt_regs,
+				   struct tty_struct *tty) 
+{
+	show_vsched();
+}
+static struct sysrq_key_op sysrq_vschedstate_op = {
+	.handler	= sysrq_handle_vschedstate,
+	.help_msg	= "showvsched(A)",
+	.action_msg	= "show_vsched(A)",
+};
+#endif
+
+#ifdef CONFIG_STOP_MACHINE
+static void sysrq_handle_stopmachine(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	stop_machine_show_state();
+}
+
+static struct sysrq_key_op sysrq_stopmachine_op = {
+	.handler	= sysrq_handle_stopmachine,
+	.help_msg	= "smachiNe",
+	.action_msg	= "Show stop machine state",
+};
+#endif
+
 /* END SIGNAL SYSRQ HANDLERS BLOCK */
 
 
 /* Key Operations table and lock */
 static spinlock_t sysrq_key_table_lock = SPIN_LOCK_UNLOCKED;
 #define SYSRQ_KEY_TABLE_LENGTH 36
-static struct sysrq_key_op *sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
+static struct sysrq_key_op *def_sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
 /* 0 */	&sysrq_loglevel_op,
 /* 1 */	&sysrq_loglevel_op,
 /* 2 */	&sysrq_loglevel_op,
@@ -264,12 +706,20 @@ static struct sysrq_key_op *sysrq_key_ta
 /* 7 */	&sysrq_loglevel_op,
 /* 8 */	&sysrq_loglevel_op,
 /* 9 */	&sysrq_loglevel_op,
+#ifdef CONFIG_SCHED_VCPU
+/* a */ &sysrq_vschedstate_op,
+#else
 /* a */	NULL, /* Don't use for system provided sysrqs,
 		 it is handled specially on the sparc
 		 and will never arrive */
+#endif
 /* b */	&sysrq_reboot_op,
 /* c */ &sysrq_crash_op,
+#ifdef CONFIG_SYSRQ_DEBUG
+/* d */	&sysrq_debug_enter,
+#else
 /* d */	NULL,
+#endif
 /* e */	&sysrq_term_op,
 /* f */	NULL,
 /* g */	NULL,
@@ -283,7 +733,11 @@ static struct sysrq_key_op *sysrq_key_ta
 #endif
 /* l */	NULL,
 /* m */	&sysrq_showmem_op,
+#ifdef CONFIG_STOP_MACHINE
+/* n */ &sysrq_stopmachine_op,
+#else
 /* n */	NULL,
+#endif
 /* o */	NULL, /* This will often be registered
 		 as 'Off' at init time */
 /* p */	&sysrq_showregs_op,
@@ -303,6 +757,29 @@ static struct sysrq_key_op *sysrq_key_ta
 /* z */	NULL
 };
 
+#ifdef CONFIG_SYSRQ_DEBUG
+static struct sysrq_key_op *debug_sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
+	[13] = &sysrq_debug_dumpmem,	/* d */
+	[26] = &sysrq_debug_quit,	/* q */
+	[27] = &sysrq_debug_resolve,	/* r */
+	[32] = &sysrq_debug_writemem,	/* w */
+	[33] = &sysrq_debug_dumpnext,	/* x */
+};
+
+static struct sysrq_key_op **sysrq_key_table = def_sysrq_key_table;
+
+/* call swap_opts(NULL) to restore opts to defaults */
+static void swap_opts(struct sysrq_key_op **swap_to)
+{
+	if (swap_to)
+		sysrq_key_table = swap_to;
+	else
+		sysrq_key_table = def_sysrq_key_table;
+}
+#else
+#define sysrq_key_table	def_sysrq_key_table
+#endif
+
 /* key2index calculation, -1 on invalid index */
 static int sysrq_key_table_key2index(int key) {
 	int retval;
@@ -392,6 +869,12 @@ void handle_sysrq(int key, struct pt_reg
 {
 	if (!sysrq_enabled)
 		return;
+#ifdef CONFIG_SYSRQ_DEBUG
+	if (sysrq_debug_mode) {
+		__handle_debug(key, pt_regs, tty);
+		return;
+	}
+#endif
 	__handle_sysrq(key, pt_regs, tty);
 }
 
diff -Nurap linux-2.6.9-100.orig/drivers/char/tty_io.c linux-2.6.9-ve023stab054/drivers/char/tty_io.c
--- linux-2.6.9-100.orig/drivers/char/tty_io.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/char/tty_io.c	2011-06-15 19:26:20.000000000 +0400
@@ -86,6 +86,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
+#include <linux/ve_owner.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -104,6 +105,7 @@
 #include <linux/devfs_fs_kernel.h>
 
 #include <linux/kmod.h>
+#include <ub/ub_mem.h>
 
 #undef TTY_DEBUG_HANGUP
 
@@ -121,11 +123,16 @@ struct termios tty_std_termios = {	/* fo
 
 EXPORT_SYMBOL(tty_std_termios);
 
+/* this lock protects tty_drivers list, this pretty guys do no locking */
+rwlock_t tty_driver_guard = RW_LOCK_UNLOCKED;
+EXPORT_SYMBOL(tty_driver_guard);
+
 /* This list gets poked at by procfs and various bits of boot up code. This
    could do with some rationalisation such as pulling the tty proc function
    into this file */
    
 LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
+EXPORT_SYMBOL(tty_drivers);
 
 /* Semaphore to protect creating and releasing a tty. This is shared with
    vt.c for deeply disgusting hack reasons */
@@ -135,6 +142,13 @@ DECLARE_MUTEX(tty_sem);
 extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
 extern int pty_limit;		/* Config limit on Unix98 ptys */
 static DEFINE_IDR(allocated_ptys);
+#ifdef CONFIG_VE
+#define ve_allocated_ptys	(*(get_exec_env()->allocated_ptys))
+#define ve_ptm_driver		(get_exec_env()->ptm_driver)
+#else
+#define ve_allocated_ptys	allocated_ptys
+#define ve_ptm_driver		ptm_driver
+#endif
 static DECLARE_MUTEX(allocated_ptys_lock);
 #endif
 
@@ -156,11 +170,25 @@ extern void rs_360_init(void);
 static void release_mem(struct tty_struct *tty, int idx);
 
 
+DCL_VE_OWNER(TTYDRV, TAIL_SOFT, struct tty_driver, owner_env, , ())
+DCL_VE_OWNER(TTY, TAIL_SOFT, struct tty_struct, owner_env, , ())
+
+void prepare_tty(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->allocated_ptys = &allocated_ptys;
+	/*
+	 * in this case, tty_register_driver() setups
+	 * owner_env correctly right from the bootup
+	 */
+#endif
+}
+
 static struct tty_struct *alloc_tty_struct(void)
 {
 	struct tty_struct *tty;
 
-	tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+	tty = ub_kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
 	if (tty)
 		memset(tty, 0, sizeof(struct tty_struct));
 	return tty;
@@ -586,14 +614,37 @@ struct tty_driver *get_tty_driver(dev_t 
 {
 	struct tty_driver *p;
 
+	read_lock(&tty_driver_guard);
 	list_for_each_entry(p, &tty_drivers, tty_drivers) {
 		dev_t base = MKDEV(p->major, p->minor_start);
 		if (device < base || device >= base + p->num)
 			continue;
 		*index = device - base;
-		return p;
+#ifdef CONFIG_VE
+		if (in_interrupt())
+			goto found;
+		if (p->major!=PTY_MASTER_MAJOR && p->major!=PTY_SLAVE_MAJOR
+#ifdef CONFIG_UNIX98_PTYS
+		    && (p->major<UNIX98_PTY_MASTER_MAJOR ||
+		    	p->major>UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT-1) &&
+		       (p->major<UNIX98_PTY_SLAVE_MAJOR ||
+		        p->major>UNIX98_PTY_SLAVE_MAJOR+UNIX98_PTY_MAJOR_COUNT-1)
+#endif
+		) goto found;
+		if (ve_is_super(VE_OWNER_TTYDRV(p)) &&
+		    ve_is_super(get_exec_env()))
+			goto found;
+		if (!ve_accessible_strict(VE_OWNER_TTYDRV(p), get_exec_env()))
+			continue;
+#endif
+		goto found;
 	}
+	read_unlock(&tty_driver_guard);
 	return NULL;
+
+found:
+	read_unlock(&tty_driver_guard);
+	return p;
 }
 
 /*
@@ -822,7 +873,7 @@ void do_tty_hangup(void *data)
 	
 	read_lock(&tasklist_lock);
 	if (tty->session > 0) {
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 			if (p->signal->tty == tty)
 				p->signal->tty = NULL;
 			if (!p->signal->leader)
@@ -831,7 +882,7 @@ void do_tty_hangup(void *data)
 			send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
 			if (tty->pgrp > 0)
 				p->signal->tty_old_pgrp = tty->pgrp;
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 	}
 	read_unlock(&tasklist_lock);
 
@@ -948,9 +999,9 @@ void disassociate_ctty(int on_exit)
 
 	/* Now clear signal->tty under the lock */
 	read_lock(&tasklist_lock);
-	do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
+	do_each_task_pid_all(current->signal->session, PIDTYPE_SID, p) {
 		p->signal->tty = NULL;
-	} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
+	} while_each_task_pid_all(current->signal->session, PIDTYPE_SID, p);
 	read_unlock(&tasklist_lock);
 	up(&tty_sem);
 	unlock_kernel();
@@ -1169,25 +1220,32 @@ static inline void tty_line_name(struct 
  * really quite straightforward.  The semaphore locking can probably be
  * relaxed for the (most common) case of reopening a tty.
  */
-static int init_dev(struct tty_driver *driver, int idx,
-	struct tty_struct **ret_tty)
+static int init_dev(struct tty_driver *driver, int idx, 
+	struct tty_struct *i_tty, struct tty_struct **ret_tty)
 {
 	struct tty_struct *tty, *o_tty;
 	struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
 	struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
+	struct ve_struct * owner;
 	int retval=0;
 
-	/* check whether we're reopening an existing tty */
-	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
-		tty = devpts_get_tty(idx);
-		if (!tty && driver->subtype == PTY_TYPE_SLAVE) {
-			retval = -EIO;
-			goto end_init;
+	owner = VE_OWNER_TTYDRV(driver);
+
+	if (i_tty)
+		tty = i_tty;
+	else {
+		/* check whether we're reopening an existing tty */
+		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+			tty = devpts_get_tty(idx);
+			if (!tty && driver->subtype == PTY_TYPE_SLAVE) {
+				retval = -EIO;
+				goto end_init;
+			}
+			if (tty && driver->subtype == PTY_TYPE_MASTER)
+				tty = tty->link;
+		} else {
+			tty = driver->ttys[idx];
 		}
-		if (tty && driver->subtype == PTY_TYPE_MASTER)
-			tty = tty->link;
-	} else {
-		tty = driver->ttys[idx];
 	}
 	if (tty) goto fast_track;
 
@@ -1215,6 +1273,7 @@ static int init_dev(struct tty_driver *d
 	tty->driver = driver;
 	tty->index = idx;
 	tty_line_name(driver, idx, tty->name);
+	SET_VE_OWNER_TTY(tty, owner);
 
 	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
 		tp_loc = &tty->termios;
@@ -1225,7 +1284,7 @@ static int init_dev(struct tty_driver *d
 	}
 
 	if (!*tp_loc) {
-		tp = (struct termios *) kmalloc(sizeof(struct termios),
+		tp = (struct termios *) ub_kmalloc(sizeof(struct termios),
 						GFP_KERNEL);
 		if (!tp)
 			goto free_mem_out;
@@ -1233,7 +1292,7 @@ static int init_dev(struct tty_driver *d
 	}
 
 	if (!*ltp_loc) {
-		ltp = (struct termios *) kmalloc(sizeof(struct termios),
+		ltp = (struct termios *) ub_kmalloc(sizeof(struct termios),
 						 GFP_KERNEL);
 		if (!ltp)
 			goto free_mem_out;
@@ -1248,6 +1307,7 @@ static int init_dev(struct tty_driver *d
 		o_tty->driver = driver->other;
 		o_tty->index = idx;
 		tty_line_name(driver->other, idx, o_tty->name);
+		SET_VE_OWNER_TTY(o_tty, owner);
 
 		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
 			o_tp_loc = &o_tty->termios;
@@ -1259,7 +1319,7 @@ static int init_dev(struct tty_driver *d
 
 		if (!*o_tp_loc) {
 			o_tp = (struct termios *)
-				kmalloc(sizeof(struct termios), GFP_KERNEL);
+				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
 			if (!o_tp)
 				goto free_mem_out;
 			*o_tp = driver->other->init_termios;
@@ -1267,7 +1327,7 @@ static int init_dev(struct tty_driver *d
 
 		if (!*o_ltp_loc) {
 			o_ltp = (struct termios *)
-				kmalloc(sizeof(struct termios), GFP_KERNEL);
+				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
 			if (!o_ltp)
 				goto free_mem_out;
 			memset(o_ltp, 0, sizeof(struct termios));
@@ -1285,6 +1345,10 @@ static int init_dev(struct tty_driver *d
 			*o_ltp_loc = o_ltp;
 		o_tty->termios = *o_tp_loc;
 		o_tty->termios_locked = *o_ltp_loc;
+#ifdef CONFIG_VE
+		if (driver->other->refcount == 0)
+			(void)get_ve(owner);
+#endif
 		driver->other->refcount++;
 		if (driver->subtype == PTY_TYPE_MASTER)
 			o_tty->count++;
@@ -1309,6 +1373,10 @@ static int init_dev(struct tty_driver *d
 		*ltp_loc = ltp;
 	tty->termios = *tp_loc;
 	tty->termios_locked = *ltp_loc;
+#ifdef CONFIG_VE
+	if (driver->refcount == 0)
+		(void)get_ve(owner);
+#endif
 	driver->refcount++;
 	tty->count++;
 
@@ -1422,6 +1490,10 @@ static void release_mem(struct tty_struc
 		}
 		o_tty->magic = 0;
 		o_tty->driver->refcount--;
+#ifdef CONFIG_VE
+		if (o_tty->driver->refcount == 0)
+			put_ve(VE_OWNER_TTY(o_tty));
+#endif
 		file_list_lock();
 		list_del_init(&o_tty->tty_files);
 		file_list_unlock();
@@ -1444,6 +1516,10 @@ static void release_mem(struct tty_struc
 
 	tty->magic = 0;
 	tty->driver->refcount--;
+#ifdef CONFIG_VE
+	if (tty->driver->refcount == 0)
+		put_ve(VE_OWNER_TTY(tty));
+#endif
 	file_list_lock();
 	list_del_init(&tty->tty_files);
 	file_list_unlock();
@@ -1467,6 +1543,9 @@ static void release_dev(struct file * fi
 	int	idx;
 	char	buf[64];
 	unsigned long flags;
+#ifdef CONFIG_UNIX98_PTYS
+	struct idr *idr_alloced;
+#endif
 	
 	tty = (struct tty_struct *)filp->private_data;
 	if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "release_dev"))
@@ -1482,6 +1561,9 @@ static void release_dev(struct file * fi
 	devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
 	devpts_master = pty_master && devpts;
 	o_tty = tty->link;
+#ifdef CONFIG_UNIX98_PTYS
+	idr_alloced = tty->owner_env->allocated_ptys;
+#endif
 
 #ifdef TTY_PARANOIA_CHECK
 	if (idx < 0 || idx >= tty->driver->num) {
@@ -1652,13 +1734,13 @@ static void release_dev(struct file * fi
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 			p->signal->tty = NULL;
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 		if (o_tty)
-			do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
+			do_each_task_pid_all(o_tty->session, PIDTYPE_SID, p) {
 				p->signal->tty = NULL;
-			} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
+			} while_each_task_pid_all(o_tty->session, PIDTYPE_SID, p);
 		read_unlock(&tasklist_lock);
 	}
 
@@ -1732,7 +1814,7 @@ static void release_dev(struct file * fi
 	/* Make this pty number available for reallocation */
 	if (devpts) {
 		down(&allocated_ptys_lock);
-		idr_remove(&allocated_ptys, idx);
+		idr_remove(idr_alloced, idx);
 		up(&allocated_ptys_lock);
 	}
 #endif
@@ -1753,7 +1835,7 @@ static void release_dev(struct file * fi
  */
 static int tty_open(struct inode * inode, struct file * filp)
 {
-	struct tty_struct *tty;
+	struct tty_struct *tty, *c_tty;
 	int noctty, retval;
 	struct tty_driver *driver;
 	int index;
@@ -1766,6 +1848,7 @@ retry_open:
 	noctty = filp->f_flags & O_NOCTTY;
 	index  = -1;
 	retval = 0;
+	c_tty = NULL;
 	
 	down(&tty_sem);
 
@@ -1776,6 +1859,7 @@ retry_open:
 		}
 		driver = current->signal->tty->driver;
 		index = current->signal->tty->index;
+		c_tty = current->signal->tty;
 		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
 		/* noctty = 1; */
 		goto got_driver;
@@ -1784,6 +1868,12 @@ retry_open:
 	if (device == MKDEV(TTY_MAJOR,0)) {
 		extern int fg_console;
 		extern struct tty_driver *console_driver;
+#ifdef CONFIG_VE
+		if (!ve_is_super(get_exec_env())) {
+			up(&tty_sem);
+			return -ENODEV;
+		}
+#endif
 		driver = console_driver;
 		index = fg_console;
 		noctty = 1;
@@ -1791,6 +1881,12 @@ retry_open:
 	}
 #endif
 	if (device == MKDEV(TTYAUX_MAJOR,1)) {
+#ifdef CONFIG_VE
+		if (!ve_is_super(get_exec_env())) {
+			up(&tty_sem);
+			return -ENODEV;
+		}
+#endif
 		driver = console_device(&index);
 		if (driver) {
 			/* Don't let /dev/console block */
@@ -1808,7 +1904,7 @@ retry_open:
 		return -ENODEV;
 	}
 got_driver:
-	retval = init_dev(driver, index, &tty);
+	retval = init_dev(driver, index, c_tty, &tty);
 	up(&tty_sem);
 	if (retval) 
 		return retval;
@@ -1877,11 +1973,11 @@ static int ptmx_open(struct inode * inod
 
 	/* find a device that is not in use. */
 	down(&allocated_ptys_lock);
-	if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
+	if (!idr_pre_get(&ve_allocated_ptys, GFP_KERNEL)) {
 		up(&allocated_ptys_lock);
 		return -ENOMEM;
 	}
-	idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
+	idr_ret = idr_get_new(&ve_allocated_ptys, NULL, &index);
 	if (idr_ret < 0) {
 		up(&allocated_ptys_lock);
 		if (idr_ret == -EAGAIN)
@@ -1889,14 +1985,14 @@ static int ptmx_open(struct inode * inod
 		return -EIO;
 	}
 	if (index >= pty_limit) {
-		idr_remove(&allocated_ptys, index);
+		idr_remove(&ve_allocated_ptys, index);
 		up(&allocated_ptys_lock);
 		return -EIO;
 	}
 	up(&allocated_ptys_lock);
 
 	down(&tty_sem);
-	retval = init_dev(ptm_driver, index, &tty);
+	retval = init_dev(ve_ptm_driver, index, NULL, &tty);
 	up(&tty_sem);
 
 	if (retval)
@@ -1919,7 +2015,7 @@ out1:
 	return retval;
 out:
 	down(&allocated_ptys_lock);
-	idr_remove(&allocated_ptys, index);
+	idr_remove(&ve_allocated_ptys, index);
 	up(&allocated_ptys_lock);
 	return retval;
 }
@@ -2033,6 +2129,8 @@ static int tioccons(struct file *file)
 {
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
+	if (!ve_is_super(get_exec_env()))
+		return -EACCES;
 	if (file->f_op->write == redirected_tty_write) {
 		struct file *f;
 		spin_lock(&redirect_lock);
@@ -2093,9 +2191,9 @@ static int tiocsctty(struct tty_struct *
 			 */
 
 			read_lock(&tasklist_lock);
-			do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+			do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 				p->signal->tty = NULL;
-			} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+			} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 			read_unlock(&tasklist_lock);
 		} else
 			return -EPERM;
@@ -2117,7 +2215,7 @@ static int tiocgpgrp(struct tty_struct *
 	 */
 	if (tty == real_tty && current->signal->tty != real_tty)
 		return -ENOTTY;
-	return put_user(real_tty->pgrp, p);
+	return put_user(pid_type_to_vpid(PIDTYPE_PGID, real_tty->pgrp), p);
 }
 
 static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -2137,6 +2235,9 @@ static int tiocspgrp(struct tty_struct *
 		return -EFAULT;
 	if (pgrp < 0)
 		return -EINVAL;
+	pgrp = vpid_to_pid(pgrp);
+	if (pgrp < 0)
+		return -EPERM;
 	if (session_of_pgrp(pgrp) != current->signal->session)
 		return -EPERM;
 	real_tty->pgrp = pgrp;
@@ -2153,7 +2254,7 @@ static int tiocgsid(struct tty_struct *t
 		return -ENOTTY;
 	if (real_tty->session <= 0)
 		return -ENOTTY;
-	return put_user(real_tty->session, p);
+	return put_user(pid_type_to_vpid(PIDTYPE_SID, real_tty->session), p);
 }
 
 static int tiocsetd(struct tty_struct *tty, int __user *p)
@@ -2429,7 +2530,7 @@ static void __do_SAK(void *arg)
 		tty->driver->flush_buffer(tty);
 	
 	read_lock(&tasklist_lock);
-	do_each_task_pid(session, PIDTYPE_SID, p) {
+	do_each_task_pid_all(session, PIDTYPE_SID, p) {
 		if (p->signal->tty == tty || session > 0) {
 			printk(KERN_NOTICE "SAK: killed process %d"
 			    " (%s): p->signal->session==tty->session\n",
@@ -2456,7 +2557,7 @@ static void __do_SAK(void *arg)
 			spin_unlock(&p->files->file_lock);
 		}
 		task_unlock(p);
-	} while_each_task_pid(session, PIDTYPE_SID, p);
+	} while_each_task_pid_all(session, PIDTYPE_SID, p);
 	read_unlock(&tasklist_lock);
 #endif
 }
@@ -2839,8 +2940,11 @@ int tty_register_driver(struct tty_drive
 
 	if (!driver->put_char)
 		driver->put_char = tty_default_put_char;
-	
+
+	SET_VE_OWNER_TTYDRV(driver, get_exec_env());
+	write_lock_irq(&tty_driver_guard);
 	list_add(&driver->tty_drivers, &tty_drivers);
+	write_unlock_irq(&tty_driver_guard);
 	
 	if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
 		for(i = 0; i < driver->num; i++)
@@ -2867,7 +2971,9 @@ int tty_unregister_driver(struct tty_dri
 	unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
 				driver->num);
 
+	write_lock_irq(&tty_driver_guard);
 	list_del(&driver->tty_drivers);
+	write_unlock_irq(&tty_driver_guard);
 
 	/*
 	 * Free the termios and termios_locked structures because
@@ -2995,6 +3101,44 @@ static int __init tty_init(void)
 
 	vty_init();
 #endif
+	prepare_tty();
 	return 0;
 }
 module_init(tty_init);
+
+#ifdef CONFIG_UNIX98_PTYS
+struct class_simple *init_ve_tty_class(void)
+{
+	struct class_simple *ve_tty_class;
+	struct class_device *ve_ptmx_dev_class;
+
+	ve_tty_class = class_simple_create(THIS_MODULE, "tty");
+	if (IS_ERR(ve_tty_class))
+		return ve_tty_class;
+
+	ve_ptmx_dev_class = class_simple_device_add(ve_tty_class,
+		MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx");
+	if (IS_ERR(ve_ptmx_dev_class)) {
+		class_simple_destroy(ve_tty_class);
+		return (struct class_simple *)ve_ptmx_dev_class;
+	}
+
+	return ve_tty_class;
+}
+
+void fini_ve_tty_class(struct class_simple *ve_tty_class)
+{
+	class_simple_device_remove(MKDEV(TTYAUX_MAJOR, 2));
+	class_simple_destroy(ve_tty_class);
+}
+#else
+struct class_simple *init_ve_tty_class(void)
+{
+return NULL;
+}
+void fini_ve_tty_class(struct class_simple *ve_tty_class)
+{
+}
+#endif
+EXPORT_SYMBOL(init_ve_tty_class);
+EXPORT_SYMBOL(fini_ve_tty_class);
diff -Nurap linux-2.6.9-100.orig/drivers/char/vt.c linux-2.6.9-ve023stab054/drivers/char/vt.c
--- linux-2.6.9-100.orig/drivers/char/vt.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/char/vt.c	2011-06-15 19:26:22.000000000 +0400
@@ -2262,8 +2262,10 @@ void vt_console_print(struct console *co
 	}
 	set_cursor(currcons);
 
-	if (!oops_in_progress)
-		poke_blanked_console();
+	if (!oops_in_progress) {
+		if (!printk_no_wake)
+			poke_blanked_console();
+	}
 
 quit:
 	clear_bit(0, &printing);
diff -Nurap linux-2.6.9-100.orig/drivers/firmware/efivars.c linux-2.6.9-ve023stab054/drivers/firmware/efivars.c
--- linux-2.6.9-100.orig/drivers/firmware/efivars.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/firmware/efivars.c	2011-06-15 19:26:22.000000000 +0400
@@ -631,7 +631,7 @@ efivar_create_sysfs_entry(unsigned long 
 	*(short_name + strlen(short_name)) = '-';
 	efi_guid_unparse(vendor_guid, short_name + strlen(short_name));
 
-	kobject_set_name(&new_efivar->kobj, short_name);
+	kobject_set_name(&new_efivar->kobj, "%s", short_name);
 	kobj_set_kset_s(new_efivar, vars_subsys);
 	kobject_register(&new_efivar->kobj);
 
diff -Nurap linux-2.6.9-100.orig/drivers/ide/pci/cmd64x.c linux-2.6.9-ve023stab054/drivers/ide/pci/cmd64x.c
--- linux-2.6.9-100.orig/drivers/ide/pci/cmd64x.c	2004-10-19 01:55:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/ide/pci/cmd64x.c	2011-06-15 19:26:18.000000000 +0400
@@ -579,7 +579,7 @@ static unsigned int __devinit init_chips
 
 #ifdef __i386__
 	if (dev->resource[PCI_ROM_RESOURCE].start) {
-		pci_write_config_byte(dev, PCI_ROM_ADDRESS, dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
+		pci_write_config_dword(dev, PCI_ROM_ADDRESS, dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
 		printk(KERN_INFO "%s: ROM enabled at 0x%08lx\n", name, dev->resource[PCI_ROM_RESOURCE].start);
 	}
 #endif
diff -Nurap linux-2.6.9-100.orig/drivers/ide/pci/hpt34x.c linux-2.6.9-ve023stab054/drivers/ide/pci/hpt34x.c
--- linux-2.6.9-100.orig/drivers/ide/pci/hpt34x.c	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/ide/pci/hpt34x.c	2011-06-15 19:26:18.000000000 +0400
@@ -192,7 +192,7 @@ static unsigned int __devinit init_chips
 
 	if (cmd & PCI_COMMAND_MEMORY) {
 		if (pci_resource_start(dev, PCI_ROM_RESOURCE)) {
-			pci_write_config_byte(dev, PCI_ROM_ADDRESS,
+			pci_write_config_dword(dev, PCI_ROM_ADDRESS,
 				dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
 			printk(KERN_INFO "HPT345: ROM enabled at 0x%08lx\n",
 				dev->resource[PCI_ROM_RESOURCE].start);
diff -Nurap linux-2.6.9-100.orig/drivers/ide/pci/hpt366.c linux-2.6.9-ve023stab054/drivers/ide/pci/hpt366.c
--- linux-2.6.9-100.orig/drivers/ide/pci/hpt366.c	2011-06-09 19:22:37.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/ide/pci/hpt366.c	2011-06-15 19:26:18.000000000 +0400
@@ -1106,7 +1106,7 @@ static unsigned int __devinit init_chips
 	u8 test = 0;
 
 	if (dev->resource[PCI_ROM_RESOURCE].start)
-		pci_write_config_byte(dev, PCI_ROM_ADDRESS,
+		pci_write_config_dword(dev, PCI_ROM_ADDRESS,
 			dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
 
 	pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &test);
diff -Nurap linux-2.6.9-100.orig/drivers/ieee1394/ieee1394_core.c linux-2.6.9-ve023stab054/drivers/ieee1394/ieee1394_core.c
--- linux-2.6.9-100.orig/drivers/ieee1394/ieee1394_core.c	2004-10-19 01:53:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/ieee1394/ieee1394_core.c	2011-06-15 19:26:18.000000000 +0400
@@ -1034,8 +1034,8 @@ static int hpsbpkt_thread(void *__hi)
 		if (khpsbpkt_kill)
 			break;
 
-		if (current->flags & PF_FREEZE) {
-			refrigerator(0);
+		if (test_thread_flag(TIF_FREEZE)) {
+			refrigerator();
 			continue;
 		}
 
diff -Nurap linux-2.6.9-100.orig/drivers/ieee1394/nodemgr.c linux-2.6.9-ve023stab054/drivers/ieee1394/nodemgr.c
--- linux-2.6.9-100.orig/drivers/ieee1394/nodemgr.c	2004-10-19 01:54:37.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/ieee1394/nodemgr.c	2011-06-15 19:26:18.000000000 +0400
@@ -1481,8 +1481,8 @@ static int nodemgr_host_thread(void *__h
 
 		if (down_interruptible(&hi->reset_sem) ||
 		    down_interruptible(&nodemgr_serialize)) {
-			if (current->flags & PF_FREEZE) {
-				refrigerator(0);
+			if (test_thread_flag(TIF_FREEZE)) {
+				refrigerator();
 				continue;
 			}
 			printk("NodeMgr: received unexpected signal?!\n" );
diff -Nurap linux-2.6.9-100.orig/drivers/input/input.c linux-2.6.9-ve023stab054/drivers/input/input.c
--- linux-2.6.9-100.orig/drivers/input/input.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/input/input.c	2011-06-15 19:26:22.000000000 +0400
@@ -20,7 +20,7 @@
 #include <linux/major.h>
 #include <linux/pm.h>
 #include <linux/proc_fs.h>
-#include <linux/kmod.h>
+#include <linux/kobject_uevent.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/device.h>
diff -Nurap linux-2.6.9-100.orig/drivers/input/serio/serio.c linux-2.6.9-ve023stab054/drivers/input/serio/serio.c
--- linux-2.6.9-100.orig/drivers/input/serio/serio.c	2004-10-19 01:53:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/input/serio/serio.c	2011-06-15 19:26:22.000000000 +0400
@@ -225,8 +225,8 @@ static int serio_thread(void *nothing)
 	do {
 		serio_handle_events();
 		wait_event_interruptible(serio_wait, !list_empty(&serio_event_list));
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 	} while (!signal_pending(current));
 
 	printk(KERN_DEBUG "serio: kseriod exiting\n");
@@ -246,11 +246,6 @@ static ssize_t serio_show_description(st
 	return sprintf(buf, "%s\n", serio->name);
 }
 
-static ssize_t serio_show_driver(struct device *dev, char *buf)
-{
-	return sprintf(buf, "%s\n", dev->driver ? dev->driver->name : "(none)");
-}
-
 static ssize_t serio_rebind_driver(struct device *dev, const char *buf, size_t count)
 {
 	struct serio *serio = to_serio_port(dev);
@@ -307,7 +302,7 @@ static ssize_t serio_set_bind_mode(struc
 
 static struct device_attribute serio_device_attrs[] = {
 	__ATTR(description, S_IRUGO, serio_show_description, NULL),
-	__ATTR(driver, S_IWUSR | S_IRUGO, serio_show_driver, serio_rebind_driver),
+	__ATTR(drvctl, S_IWUSR, NULL, serio_rebind_driver),
 	__ATTR(bind_mode, S_IWUSR | S_IRUGO, serio_show_bind_mode, serio_set_bind_mode),
 	__ATTR_NULL
 };
diff -Nurap linux-2.6.9-100.orig/drivers/md/md.c linux-2.6.9-ve023stab054/drivers/md/md.c
--- linux-2.6.9-100.orig/drivers/md/md.c	2011-06-09 19:22:53.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/md/md.c	2011-06-15 19:26:18.000000000 +0400
@@ -2844,8 +2844,8 @@ int md_thread(void * arg)
 
 		wait_event_interruptible(thread->wqueue,
 					 test_bit(THREAD_WAKEUP, &thread->flags));
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 
 		clear_bit(THREAD_WAKEUP, &thread->flags);
 
diff -Nurap linux-2.6.9-100.orig/drivers/net/8139too.c linux-2.6.9-ve023stab054/drivers/net/8139too.c
--- linux-2.6.9-100.orig/drivers/net/8139too.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/net/8139too.c	2011-06-15 19:26:18.000000000 +0400
@@ -1628,8 +1628,8 @@ static int rtl8139_thread (void *data)
 		do {
 			timeout = interruptible_sleep_on_timeout (&tp->thr_wait, timeout);
 			/* make swsusp happy with our thread */
-			if (current->flags & PF_FREEZE)
-				refrigerator(PF_FREEZE);
+			if (test_thread_flag(TIF_FREEZE))
+				refrigerator();
 		} while (!signal_pending (current) && (timeout > 0));
 
 		if (signal_pending (current)) {
diff -Nurap linux-2.6.9-100.orig/drivers/net/ethertap.c linux-2.6.9-ve023stab054/drivers/net/ethertap.c
--- linux-2.6.9-100.orig/drivers/net/ethertap.c	2004-10-19 01:53:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/net/ethertap.c	2011-06-15 19:26:20.000000000 +0400
@@ -347,8 +347,9 @@ int __init ethertap_init(void)
 {
 	int i, err = 0;
 
-	/* netlink can only hande 16 entries unless modified */
-	if (max_taps > MAX_LINKS - NETLINK_TAPBASE)
+	/* netlink can only hande (NETLINK_TAPLAST - NETLINK_TAPBASE + 1) (15) */
+	/* entries unless modified */
+	if (max_taps > NETLINK_TAPLAST - NETLINK_TAPBASE + 1)
 		return -E2BIG;
 
 	tap_map = kmalloc(sizeof(struct net_device *)*max_taps, GFP_KERNEL);
diff -Nurap linux-2.6.9-100.orig/drivers/net/irda/sir_kthread.c linux-2.6.9-ve023stab054/drivers/net/irda/sir_kthread.c
--- linux-2.6.9-100.orig/drivers/net/irda/sir_kthread.c	2004-10-19 01:55:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/net/irda/sir_kthread.c	2011-06-15 19:26:18.000000000 +0400
@@ -136,8 +136,8 @@ static int irda_thread(void *startup)
 		remove_wait_queue(&irda_rq_queue.kick, &wait);
 
 		/* make swsusp happy with our thread */
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 
 		run_irda_queue();
 	}
diff -Nurap linux-2.6.9-100.orig/drivers/net/irda/stir4200.c linux-2.6.9-ve023stab054/drivers/net/irda/stir4200.c
--- linux-2.6.9-100.orig/drivers/net/irda/stir4200.c	2004-10-19 01:53:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/net/irda/stir4200.c	2011-06-15 19:26:18.000000000 +0400
@@ -767,7 +767,7 @@ static int stir_transmit_thread(void *ar
 	       && !signal_pending(current))
 	{
 		/* if suspending, then power off and wait */
-		if (current->flags & PF_FREEZE) {
+		if (test_thread_flag(TIF_FREEZE)) {
 			if (stir->receiving)
 				receive_stop(stir);
 			else
@@ -775,7 +775,7 @@ static int stir_transmit_thread(void *ar
 
 			write_reg(stir, REG_CTRL1, CTRL1_TXPWD|CTRL1_RXPWD);
 
-			refrigerator(PF_FREEZE);
+			refrigerator();
 
 			if (change_speed(stir, stir->speed))
 				break;
diff -Nurap linux-2.6.9-100.orig/drivers/net/irda/vlsi_ir.h linux-2.6.9-ve023stab054/drivers/net/irda/vlsi_ir.h
--- linux-2.6.9-100.orig/drivers/net/irda/vlsi_ir.h	2004-10-19 01:54:37.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/net/irda/vlsi_ir.h	2011-06-15 19:26:19.000000000 +0400
@@ -58,7 +58,7 @@ typedef void irqreturn_t;
 
 /* PDE() introduced in 2.5.4 */
 #ifdef CONFIG_PROC_FS
-#define PDE(inode) ((inode)->u.generic_ip)
+#define LPDE(inode) ((inode)->u.generic_ip)
 #endif
 
 /* irda crc16 calculation exported in 2.5.42 */
diff -Nurap linux-2.6.9-100.orig/drivers/net/loopback.c linux-2.6.9-ve023stab054/drivers/net/loopback.c
--- linux-2.6.9-100.orig/drivers/net/loopback.c	2004-10-19 01:53:22.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/net/loopback.c	2011-06-15 19:26:20.000000000 +0400
@@ -59,6 +59,13 @@
 #include <linux/percpu.h>
 
 static DEFINE_PER_CPU(struct net_device_stats, loopback_stats);
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define LOOPBACK_STATS(cpu)	(ve_is_super(get_exec_env())) ?	\
+				&per_cpu(loopback_stats, cpu) :	\
+				(&(get_exec_env()->_loopback_stats)[(cpu)])
+#else
+#define LOOPBACK_STATS(cpu)	&per_cpu(loopback_stats, cpu)
+#endif
 
 #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)
 
@@ -128,6 +135,11 @@ static int loopback_xmit(struct sk_buff 
 {
 	struct net_device_stats *lb_stats;
 
+	if (unlikely(get_exec_env()->disable_net)) {
+		kfree_skb(skb);
+		return 0;
+	}
+
 	skb_orphan(skb);
 
 	skb->protocol=eth_type_trans(skb,dev);
@@ -146,7 +158,7 @@ static int loopback_xmit(struct sk_buff 
 
 	dev->last_rx = jiffies;
 
-	lb_stats = &per_cpu(loopback_stats, get_cpu());
+	lb_stats = LOOPBACK_STATS(get_cpu());
 	lb_stats->rx_bytes += skb->len;
 	lb_stats->tx_bytes += skb->len;
 	lb_stats->rx_packets++;
@@ -174,7 +186,7 @@ static struct net_device_stats *get_stat
 
 		if (!cpu_possible(i)) 
 			continue;
-		lb_stats = &per_cpu(loopback_stats, i);
+		lb_stats = LOOPBACK_STATS(i);
 		stats->rx_bytes   += lb_stats->rx_bytes;
 		stats->tx_bytes   += lb_stats->tx_bytes;
 		stats->rx_packets += lb_stats->rx_packets;
@@ -195,6 +207,30 @@ static struct ethtool_ops loopback_ethto
 	.set_tso		= ethtool_op_set_tso,
 };
 
+static void loopback_destructor(struct net_device *dev)
+{
+	kfree(dev->priv);
+	dev->priv = NULL;
+}
+
+struct net_device templ_loopback_dev = {
+	.name	 		= "lo",
+	.mtu			= (16 * 1024) + 20 + 20 + 12,
+	.hard_start_xmit	= loopback_xmit,
+	.hard_header		= eth_header,
+	.hard_header_cache	= eth_header_cache,
+	.header_cache_update	= eth_header_cache_update,
+	.hard_header_len	= ETH_HLEN,	/* 14	*/
+	.addr_len		= ETH_ALEN,	/* 6	*/
+	.tx_queue_len		= 0,
+	.type			= ARPHRD_LOOPBACK,	/* 0x0001*/
+	.rebuild_header		= eth_rebuild_header,
+	.flags			= IFF_LOOPBACK,
+	.features 		= NETIF_F_SG|NETIF_F_FRAGLIST
+				  |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
+				  |NETIF_F_LLTX|NETIF_F_VIRTUAL,
+};
+
 struct net_device loopback_dev = {
 	.name	 		= "lo",
 	.mtu			= (16 * 1024) + 20 + 20 + 12,
@@ -225,9 +261,11 @@ int __init loopback_init(void)
 		memset(stats, 0, sizeof(struct net_device_stats));
 		loopback_dev.priv = stats;
 		loopback_dev.get_stats = &get_stats;
+		loopback_dev.destructor = &loopback_destructor;
 	}
 	
 	return register_netdev(&loopback_dev);
 };
 
 EXPORT_SYMBOL(loopback_dev);
+EXPORT_SYMBOL(templ_loopback_dev);
diff -Nurap linux-2.6.9-100.orig/drivers/net/net_init.c linux-2.6.9-ve023stab054/drivers/net/net_init.c
--- linux-2.6.9-100.orig/drivers/net/net_init.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/net/net_init.c	2011-06-15 19:26:19.000000000 +0400
@@ -51,6 +51,7 @@
 #include <linux/if_ltalk.h>
 #include <linux/rtnetlink.h>
 #include <net/neighbour.h>
+#include <ub/ub_mem.h>
 
 /* The network devices currently exist only in the socket namespace, so these
    entries are unused.  The only ones that make sense are
@@ -90,7 +91,7 @@ struct net_device *alloc_netdev(int size
 			& ~NETDEV_ALIGN_CONST;
 	alloc_size += sizeof(struct net_device_wrapper)  + NETDEV_ALIGN_CONST;
 
-	p = kmalloc (alloc_size, GFP_KERNEL);
+	p = ub_kmalloc(alloc_size, GFP_KERNEL);
 	if (!p) {
 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
 		return NULL;
@@ -406,6 +407,10 @@ int register_netdev(struct net_device *d
 
 out:
 	rtnl_unlock();
+	if (err == 0 && dev->reg_state != NETREG_REGISTERED) {
+		unregister_netdev(dev);
+		err = -ENOMEM;
+	}
 	return err;
 }
 
diff -Nurap linux-2.6.9-100.orig/drivers/net/open_vznet.c linux-2.6.9-ve023stab054/drivers/net/open_vznet.c
--- linux-2.6.9-100.orig/drivers/net/open_vznet.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/drivers/net/open_vznet.c	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,199 @@
+/*
+ *  open_vznet.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Virtual Networking device used to change VE ownership on packets
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+
+#include <linux/inet.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <linux/venet.h>
+
+void veip_stop(struct ve_struct *ve)
+{
+	struct list_head *p, *tmp;
+
+	write_lock_irq(&veip_hash_lock);
+	if (ve->veip == NULL)
+		goto unlock;
+	list_for_each_safe(p, tmp, &ve->veip->ip_lh) {
+		struct ip_entry_struct *ptr;
+		ptr = list_entry(p, struct ip_entry_struct, ve_list);
+		ptr->active_env = NULL;
+		list_del(&ptr->ve_list);
+		list_del(&ptr->ip_hash);
+		kfree(ptr);
+	}
+	veip_put(ve->veip);
+	ve->veip = NULL;
+	if (!ve_is_super(ve))
+		module_put(THIS_MODULE);
+unlock:
+	write_unlock_irq(&veip_hash_lock);
+}
+
+void veip_list_cleanup(struct veip_struct *veip)
+{
+}
+
+int veip_start(struct ve_struct *ve)
+{
+	int err, new_veip;
+
+	err = 0;
+	write_lock_irq(&veip_hash_lock);
+	new_veip = (ve->veip == NULL);
+	ve->veip = veip_findcreate(ve->veid);
+	if (ve->veip == NULL)
+		err = -ENOMEM;
+	write_unlock_irq(&veip_hash_lock);
+	if (err == 0 && new_veip && !ve_is_super(ve))
+		__module_get(THIS_MODULE);
+	return err;
+}
+
+int veip_entry_add(struct ve_struct *ve, struct sockaddr_in *addr)
+{
+	struct ip_entry_struct *entry, *found;
+	int err;
+
+	entry = kmalloc(sizeof(struct ip_entry_struct), GFP_KERNEL);
+	if (entry == NULL)
+		return -ENOMEM;
+
+	memset(entry, 0, sizeof(struct ip_entry_struct));
+	entry->ip = addr->sin_addr.s_addr;
+
+	write_lock_irq(&veip_hash_lock);
+	err = -EADDRINUSE;
+	found = ip_entry_lookup(entry->ip);
+	if (found != NULL)
+		goto out_unlock;
+	else {
+		ip_entry_hash(entry, ve->veip);
+		found = entry;
+		entry = NULL;
+	}
+	err = 0;
+	found->active_env = ve;
+out_unlock:
+	write_unlock_irq(&veip_hash_lock);
+	if (entry != NULL)
+		kfree(entry);
+	return err;
+}
+
+int veip_entry_del(envid_t veid, struct sockaddr_in *addr)
+{
+	struct ip_entry_struct *found;
+	int err;
+
+	err = -EADDRNOTAVAIL;
+	write_lock_irq(&veip_hash_lock);
+	found = ip_entry_lookup(addr->sin_addr.s_addr);
+	if (found == NULL)
+		goto out;
+	if (found->active_env->veid != veid)
+		goto out;
+
+	err = 0;
+	found->active_env = NULL;
+
+	list_del(&found->ip_hash);
+	list_del(&found->ve_list);
+	kfree(found);
+out:
+	write_unlock_irq(&veip_hash_lock);
+	return err;
+}
+
+static struct ve_struct *venet_find_ve(__u32 ip)
+{
+	struct ip_entry_struct *entry;
+
+	entry = ip_entry_lookup(ip);
+	if (entry == NULL)
+		return NULL;
+
+	return entry->active_env;
+}
+
+int venet_change_skb_owner(struct sk_buff *skb)
+{
+	struct ve_struct *ve, *ve_old;
+	struct iphdr *iph;
+
+	ve_old = skb->owner_env;
+	iph = skb->nh.iph;
+
+	read_lock(&veip_hash_lock);
+	if (!ve_is_super(ve_old)) {
+		/* from VE to host */
+		ve = venet_find_ve(iph->saddr);
+		if (ve == NULL)
+			goto out_drop;
+		if (!ve_accessible_strict(ve, ve_old))
+			goto out_source;
+		skb->owner_env = get_ve0();
+	} else {
+		/* from host to VE */
+		ve = venet_find_ve(iph->daddr);
+		if (ve == NULL)
+			goto out_drop;
+		skb->owner_env = ve;
+	}
+	read_unlock(&veip_hash_lock);
+
+	return 0;
+
+out_drop:
+	read_unlock(&veip_hash_lock);
+	return -ESRCH;
+
+out_source:
+	read_unlock(&veip_hash_lock);
+	if (net_ratelimit()) {
+		printk(KERN_WARNING "Dropped packet, source wrong "
+		       "veid=%u src-IP=%u.%u.%u.%u "
+		       "dst-IP=%u.%u.%u.%u\n",
+		       skb->owner_env->veid,
+		       NIPQUAD(skb->nh.iph->saddr),
+		       NIPQUAD(skb->nh.iph->daddr));
+	}
+	return -EACCES;
+}
+
+#ifdef CONFIG_PROC_FS
+int veip_seq_show(struct seq_file *m, void *v)
+{
+	struct list_head *p;
+	struct ip_entry_struct *entry;
+	char s[16];
+
+	p = (struct list_head *)v;
+	if (p == ip_entry_hash_table) {
+		seq_puts(m, "Version: 2.5\n");
+		return 0;
+	}
+	entry = list_entry(p, struct ip_entry_struct, ip_hash);
+	sprintf(s, "%u.%u.%u.%u", NIPQUAD(entry->ip));
+	seq_printf(m, "%15s %10u\n", s, 0);
+	return 0;
+}
+#endif
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Virtual Network Device");
+MODULE_LICENSE("GPL v2");
diff -Nurap linux-2.6.9-100.orig/drivers/net/tun.c linux-2.6.9-ve023stab054/drivers/net/tun.c
--- linux-2.6.9-100.orig/drivers/net/tun.c	2004-10-19 01:54:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/net/tun.c	2011-06-15 19:26:20.000000000 +0400
@@ -44,6 +44,7 @@
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <ub/beancounter.h>
 
 #ifdef TUN_DEBUG
 static int debug;
@@ -71,6 +72,9 @@ static int tun_net_close(struct net_devi
 static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
+#if 0
+	struct user_beancounter *ub;
+#endif
 
 	DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
 
@@ -90,6 +94,21 @@ static int tun_net_xmit(struct sk_buff *
 		if (skb_queue_len(&tun->readq) >= dev->tx_queue_len)
 			goto drop;
 	}
+
+#if 0
+	ub = netdev_bc(dev)->exec_ub;
+	if (ub && (skb_bc(skb)->charged == 0)) {
+		unsigned long charge;
+		charge = skb_charge_fullsize(skb);
+		if (charge_beancounter(ub, UB_OTHERSOCKBUF, charge, 1))
+			goto drop;
+		get_beancounter(ub);
+		skb_bc(skb)->ub = ub;
+		skb_bc(skb)->charged = charge;
+		skb_bc(skb)->resource = UB_OTHERSOCKBUF;
+	}
+#endif
+
 	skb_queue_tail(&tun->readq, skb);
 
 	/* Notify and wake up reader process */
@@ -174,24 +193,31 @@ static __inline__ ssize_t tun_get_user(s
 {
 	struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
 	struct sk_buff *skb;
-	size_t len = count;
+	size_t len = count, align = 0;
 
 	if (!(tun->flags & TUN_NO_PI)) {
-		if ((len -= sizeof(pi)) > len)
+		if ((len -= sizeof(pi)) > count)
 			return -EINVAL;
 
 		if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
 			return -EFAULT;
 	}
+
+	if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV)
+		align = NET_IP_ALIGN;
  
-	if (!(skb = alloc_skb(len + 2, GFP_KERNEL))) {
+	if (!(skb = alloc_skb(len + align, GFP_KERNEL))) {
 		tun->stats.rx_dropped++;
 		return -ENOMEM;
 	}
 
-	skb_reserve(skb, 2);
-	if (memcpy_fromiovec(skb_put(skb, len), iv, len))
+	if (align)
+		skb_reserve(skb, align);
+	if (memcpy_fromiovec(skb_put(skb, len), iv, len)) {
+		tun->stats.rx_dropped++;
+		kfree_skb(skb);
 		return -EFAULT;
+	}
 
 	skb->dev = tun->dev;
 	switch (tun->flags & TUN_TYPE_MASK) {
@@ -322,6 +348,7 @@ static ssize_t tun_chr_readv(struct file
 
 		ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
 
+		/* skb will be uncharged in kfree_skb() */
 		kfree_skb(skb);
 		break;
 	}
@@ -355,6 +382,7 @@ static void tun_setup(struct net_device 
 	dev->stop = tun_net_close;
 	dev->get_stats = tun_net_stats;
 	dev->destructor = free_netdev;
+	dev->features |= NETIF_F_VIRTUAL;
 }
 
 static struct tun_struct *tun_get_by_name(const char *name)
@@ -363,8 +391,9 @@ static struct tun_struct *tun_get_by_nam
 
 	ASSERT_RTNL();
 	list_for_each_entry(tun, &tun_dev_list, list) {
-		if (!strncmp(tun->dev->name, name, IFNAMSIZ))
-		    return tun;
+		if (ve_accessible_strict(tun->dev->owner_env, get_exec_env()) &&
+		    !strncmp(tun->dev->name, name, IFNAMSIZ))
+			return tun;
 	}
 
 	return NULL;
@@ -383,7 +412,8 @@ static int tun_set_iff(struct file *file
 
 		/* Check permissions */
 		if (tun->owner != -1 &&
-		    current->euid != tun->owner && !capable(CAP_NET_ADMIN))
+		    current->euid != tun->owner && 
+		    !capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			return -EPERM;
 	} 
 	else if (__dev_get_by_name(ifr->ifr_name)) 
@@ -496,6 +526,9 @@ static int tun_chr_ioctl(struct inode *i
 		break;
 
 	case TUNSETPERSIST:
+		/* prohibit persist mode iniside VE */
+		if (!ve_is_super(get_exec_env()))
+			return -EPERM;
 		/* Disable/Enable persist mode */
 		if (arg)
 			tun->flags |= TUN_PERSIST;
diff -Nurap linux-2.6.9-100.orig/drivers/net/venet_core.c linux-2.6.9-ve023stab054/drivers/net/venet_core.c
--- linux-2.6.9-100.orig/drivers/net/venet_core.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/drivers/net/venet_core.c	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,706 @@
+/*
+ *  venet_core.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Common part for Virtuozzo virtual network devices
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/if_ether.h>	/* For the statistics structure. */
+#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
+#include <linux/venet.h>
+#include <linux/ve_proto.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_venet.h>
+
+struct list_head ip_entry_hash_table[VEIP_HASH_SZ];
+rwlock_t veip_hash_lock = RW_LOCK_UNLOCKED;
+LIST_HEAD(veip_lh);
+
+struct venet_stats {
+	struct net_device_stats	stats;
+	struct net_device_stats	*real_stats;
+};
+
+static inline struct net_device_stats *
+venet_stats(struct net_device *dev, int cpu)
+{
+	struct venet_stats *stats;
+	stats = (struct venet_stats*)dev->priv;
+	return per_cpu_ptr(stats->real_stats, cpu);
+}
+
+
+#define ip_entry_hash_function(ip)  (ntohl(ip) & (VEIP_HASH_SZ - 1))
+
+void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip)
+{
+	list_add(&entry->ip_hash,
+		 ip_entry_hash_table + ip_entry_hash_function(entry->ip));
+	list_add(&entry->ve_list, &veip->ip_lh);
+}
+
+void veip_put(struct veip_struct *veip)
+{
+	if (!list_empty(&veip->ip_lh))
+		return;
+	if (!list_empty(&veip->src_lh))
+		return;
+	if (!list_empty(&veip->dst_lh))
+		return;
+
+	list_del(&veip->list);
+	kfree(veip);
+}
+
+struct ip_entry_struct *ip_entry_lookup(u32 addr)
+{
+	struct ip_entry_struct *entry;
+	struct list_head *tmp;
+
+	list_for_each(tmp, ip_entry_hash_table + ip_entry_hash_function(addr)) {
+		entry = list_entry(tmp, struct ip_entry_struct, ip_hash);
+		if (entry->ip != addr)
+			continue;
+		return entry;
+	}
+	return NULL;
+}
+
+struct veip_struct *veip_find(envid_t veid)
+{
+	struct veip_struct *ptr;
+	list_for_each_entry(ptr, &veip_lh, list) {
+		if (ptr->veid != veid)
+			continue;
+		return ptr;
+	}
+	return NULL;
+}
+
+struct veip_struct *veip_findcreate(envid_t veid)
+{
+	struct veip_struct *ptr;
+
+	ptr = veip_find(veid);
+	if (ptr != NULL)
+		return ptr;
+
+	ptr = kmalloc(sizeof(struct veip_struct), GFP_ATOMIC);
+	if (ptr == NULL)
+		return NULL;
+	memset(ptr, 0, sizeof(struct veip_struct));
+	INIT_LIST_HEAD(&ptr->ip_lh);
+	INIT_LIST_HEAD(&ptr->src_lh);
+	INIT_LIST_HEAD(&ptr->dst_lh);
+	ptr->veid = veid;
+	list_add(&ptr->list, &veip_lh);
+	return ptr;
+}
+
+/*
+ * Device functions
+ */
+
+static int venet_open(struct net_device *dev)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+	return 0;
+}
+
+static int venet_close(struct net_device *master)
+{
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+static void venet_destructor(struct net_device *dev)
+{
+	struct venet_stats *stats = (struct venet_stats *)dev->priv;
+	if (stats == NULL)
+		return;
+	free_percpu(stats->real_stats);
+	kfree(stats);
+	dev->priv = NULL;
+}
+
+/*
+ * The higher levels take care of making this non-reentrant (it's
+ * called with bh's disabled).
+ */
+static int venet_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats;
+	struct net_device *rcv = NULL;
+	struct iphdr *iph;
+	int length;
+
+	stats = venet_stats(dev, smp_processor_id());
+
+	if (unlikely(get_exec_env()->disable_net))
+		goto outf;
+
+	/*
+	 *	Optimise so buffers with skb->free=1 are not copied but
+	 *	instead are lobbed from tx queue to rx queue
+	 */
+	if (atomic_read(&skb->users) != 1) {
+	  	struct sk_buff *skb2 = skb;
+	  	skb = skb_clone(skb, GFP_ATOMIC);	/* Clone the buffer */
+	  	if (skb == NULL) {
+			kfree_skb(skb2);
+			goto out;
+		}
+	  	kfree_skb(skb2);
+	} else
+		skb_orphan(skb);
+
+	if (skb->protocol != __constant_htons(ETH_P_IP))
+		goto outf;
+
+	iph = skb->nh.iph;
+	if (MULTICAST(iph->daddr))
+		goto outf;
+
+	if (venet_change_skb_owner(skb) < 0)
+		goto outf;
+
+	if (unlikely(VE_OWNER_SKB(skb)->disable_net))
+		goto outf;
+
+	rcv = VE_OWNER_SKB(skb)->_venet_dev;
+	if (!rcv)
+		/* VE going down */
+		goto outf;
+
+	dev_hold(rcv);
+
+	if (!(rcv->flags & IFF_UP)) {
+		/* Target VE does not want to receive packets */
+		dev_put(rcv);
+		goto outf;
+	}
+
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = rcv;
+
+	skb->mac.raw = skb->data;
+	memset(skb->data - dev->hard_header_len, 0, dev->hard_header_len);
+
+	dst_release(skb->dst);
+	skb->dst = NULL;
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = NULL;
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 0;
+#endif
+#endif
+	length = skb->len;
+
+	netif_rx(skb);
+
+	stats->tx_bytes += length;
+	stats->tx_packets++;
+	if (rcv) {
+		struct net_device_stats *rcv_stats;
+
+		rcv_stats = venet_stats(rcv, smp_processor_id());
+		rcv_stats->rx_bytes += length;
+		rcv_stats->rx_packets++;
+		dev_put(rcv);
+	}
+
+	return 0;
+
+outf:
+	kfree_skb(skb);
+	stats->tx_dropped++;
+out:
+	return 0;
+}
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+	int i;
+	struct venet_stats *stats;
+
+	stats = (struct venet_stats *)dev->priv;
+	memset(&stats->stats, 0, sizeof(struct net_device_stats));
+	for (i=0; i < NR_CPUS; i++) {
+		struct net_device_stats *dev_stats;
+
+		if (!cpu_possible(i)) 
+			continue;
+		dev_stats = venet_stats(dev, i);
+		stats->stats.rx_bytes   += dev_stats->rx_bytes;
+		stats->stats.tx_bytes   += dev_stats->tx_bytes;
+		stats->stats.rx_packets += dev_stats->rx_packets;
+		stats->stats.tx_packets += dev_stats->tx_packets;
+	}
+
+	return &stats->stats;
+}
+
+/* Initialize the rest of the LOOPBACK device. */
+int venet_init_dev(struct net_device *dev)
+{
+	struct venet_stats *stats;
+
+	dev->hard_start_xmit = venet_xmit;
+	stats = kzalloc(sizeof(struct venet_stats), GFP_KERNEL);
+	if (stats == NULL)
+		goto fail;
+	stats->real_stats = alloc_percpu(struct net_device_stats);
+	if (stats->real_stats == NULL)
+		goto fail_free;
+	dev->priv = stats;
+
+	dev->get_stats = get_stats;
+	dev->open = venet_open;
+	dev->stop = venet_close;
+	dev->destructor = venet_destructor;
+
+	/*
+	 *	Fill in the generic fields of the device structure.
+	 */
+	dev->type		= ARPHRD_VOID;
+	dev->hard_header_len 	= ETH_HLEN;
+	dev->mtu		= 1500; /* eth_mtu */
+	dev->tx_queue_len	= 0;
+
+	memset(dev->broadcast, 0xFF, ETH_ALEN);
+
+	/* New-style flags. */
+	dev->flags		= IFF_BROADCAST|IFF_NOARP|IFF_POINTOPOINT;
+	return 0;
+
+fail_free:
+	kfree(stats);
+fail:
+	return -ENOMEM;
+}
+
+static void venet_setup(struct net_device *dev)
+{
+	dev->init = venet_init_dev;
+	/*
+	 * No other features, as they are:
+	 *  - checksumming is required, and nobody else will done our job
+	 */
+	dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL | NETIF_F_LLTX |
+		NETIF_F_VLAN_CHALLENGED;
+}
+
+#ifdef CONFIG_PROC_FS
+static int veinfo_seq_show(struct seq_file *m, void *v)
+{
+	struct ve_struct *ve = (struct ve_struct *)v;
+	struct list_head *tmp;
+
+	seq_printf(m, "%10u %5u %5u", ve->veid,
+                                ve->class_id, atomic_read(&ve->pcounter));
+	read_lock(&veip_hash_lock);
+	if (ve->veip == NULL)
+		goto unlock;
+	list_for_each(tmp, &ve->veip->ip_lh) {
+		char ip[16];
+		struct ip_entry_struct *entry;
+
+		entry = list_entry(tmp, struct ip_entry_struct, ve_list);
+		if (entry->active_env == NULL)
+			continue;
+
+		sprintf(ip, "%u.%u.%u.%u", NIPQUAD(entry->ip));
+		seq_printf(m, " %15s", ip);
+	}
+unlock:
+	read_unlock(&veip_hash_lock);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static void *ve_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct ve_struct *ve, *curve;
+	loff_t l;
+
+	curve = get_exec_env();
+	read_lock(&ve_list_guard);
+	if (!ve_is_super(curve)) {
+		if (*pos != 0)
+			return NULL;
+		return curve;
+	}
+	for (ve = ve_list_head, l = *pos;
+	     ve != NULL && l > 0;
+	     ve = ve->next, l--);
+	return ve;
+}
+
+static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct ve_struct *ve = (struct ve_struct *)v;
+
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
+	(*pos)++;
+	return ve->next;
+}
+
+static void ve_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_list_guard);
+}
+
+
+static struct seq_operations veinfo_seq_op = {
+        start:  ve_seq_start,
+        next:   ve_seq_next,
+        stop:   ve_seq_stop,
+        show:   veinfo_seq_show
+};
+
+static int veinfo_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &veinfo_seq_op);
+}
+
+static struct file_operations proc_veinfo_operations = {
+        open:           veinfo_open,
+        read:           seq_read,
+        llseek:         seq_lseek,
+        release:        seq_release
+};
+
+static void *veip_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t l;
+	struct list_head *p;
+	int i;
+
+	l = *pos;
+	write_lock_irq(&veip_hash_lock);
+	if (l == 0)
+		return ip_entry_hash_table;
+	for (i = 0; i < VEIP_HASH_SZ; i++) {
+		list_for_each(p, ip_entry_hash_table + i) {
+			if (--l == 0)
+				return p;
+		}
+	}
+	return NULL;
+}
+
+static void *veip_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *p;
+
+	p = (struct list_head *)v;
+	while (1) {
+		p = p->next;
+		if (p < ip_entry_hash_table ||
+		    p >= ip_entry_hash_table + VEIP_HASH_SZ) {
+			(*pos)++;
+			return p;
+		}
+		if (++p >= ip_entry_hash_table + VEIP_HASH_SZ)
+			return NULL;
+	}
+	return NULL;
+}
+
+static void veip_seq_stop(struct seq_file *m, void *v)
+{
+	write_unlock_irq(&veip_hash_lock);
+}
+
+static struct seq_operations veip_seq_op = {
+        start:  veip_seq_start,
+        next:   veip_seq_next,
+        stop:   veip_seq_stop,
+        show:   veip_seq_show
+};
+
+static int veip_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &veip_seq_op);
+}
+
+static struct file_operations proc_veip_operations = {
+        open:           veip_open,
+        read:           seq_read,
+        llseek:         seq_lseek,
+        release:        seq_release
+};
+#endif
+
+int real_ve_ip_map(envid_t veid, int op, struct sockaddr *uservaddr, int addrlen)
+{
+	int err;
+	struct sockaddr_in addr;
+	struct ve_struct *ve;
+
+	err = -EPERM;
+	if (!capable(CAP_SETVEID))
+		goto out;
+
+	err = -EINVAL;
+	if (addrlen != sizeof(struct sockaddr_in))
+		goto out;
+
+	err = move_addr_to_kernel(uservaddr, addrlen, &addr);
+	if (err < 0)
+		goto out;
+
+	switch (op)
+	{
+		case VE_IP_ADD:
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veip_entry_add(ve, &addr);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+
+		case VE_IP_DEL:
+			err = veip_entry_del(veid, &addr);
+			break;
+		default:
+			err = -EINVAL;
+	}
+
+out:
+	return err;
+}
+
+int venet_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	    case VENETCTL_VE_IP_MAP: {
+			struct vzctl_ve_ip_map s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = real_ve_ip_map(s.veid, s.op, s.addr, s.addrlen);
+		}
+		break;
+	}
+	return err;
+}
+
+static struct vzioctlinfo venetcalls = {
+	type: VENETCTLTYPE,
+	func: venet_ioctl,
+	owner: THIS_MODULE,
+};
+
+int venet_dev_start(struct ve_struct *env)
+{
+	struct net_device *dev_venet;
+	int err;
+
+	dev_venet = alloc_netdev(0, "venet%d", venet_setup);
+	if (!dev_venet)
+		return -ENOMEM;
+	err = dev_alloc_name(dev_venet, dev_venet->name);
+	if (err<0)
+		goto err;
+	if ((err = register_netdev(dev_venet)) != 0)
+		goto err;
+	env->_venet_dev = dev_venet;
+	return 0;
+err:
+	free_netdev(dev_venet);
+	printk(KERN_ERR "VENET initialization error err=%d\n", err);
+	return err;
+}
+
+static int venet_start(unsigned int hooknum, void *data)
+{
+	struct ve_struct *env;
+	int err;
+
+	env = (struct ve_struct *)data;
+	if (env->veip)
+		return -EEXIST;
+
+	err = veip_start(env);
+	if (err < 0)
+		return err;
+
+	err = venet_dev_start(env);
+	if (err)
+		goto err_free;
+	return 0;
+
+err_free:
+	veip_stop(env);
+	return err;
+}
+
+static int venet_stop(unsigned int hooknum, void *data)
+{
+	struct ve_struct *env;
+	env = (struct ve_struct *)data;
+
+	if (env->_venet_dev != NULL) {
+		unregister_netdev(env->_venet_dev);
+		free_netdev(env->_venet_dev);
+	}
+	env->_venet_dev = NULL;
+
+	veip_stop(env);
+	return 0;
+}
+
+#define VE_HOOK_PRI_NET		0
+
+static struct ve_hook venet_ve_hook_init = {
+	hook:	venet_start,
+	undo:	venet_stop,
+	hooknum: VE_HOOK_INIT,
+	priority: VE_HOOK_PRI_NET,
+	owner: THIS_MODULE,
+};
+
+static struct ve_hook venet_ve_hook_fini = {
+	hook:	venet_stop,
+	hooknum: VE_HOOK_FINI,
+	priority: VE_HOOK_PRI_NET,
+	owner: THIS_MODULE,
+};
+
+__init int venet_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *de;
+#endif
+	int i, err;
+
+	if (get_ve0()->_venet_dev != NULL)
+		return -EEXIST;
+
+	for (i = 0; i < VEIP_HASH_SZ; i++)
+		INIT_LIST_HEAD(ip_entry_hash_table + i);
+
+	err = venet_start(VE_HOOK_INIT, (void *)get_ve0());
+	if (err)
+		return err;
+
+#ifdef CONFIG_PROC_FS
+	de = create_proc_glob_entry_mod("vz/veinfo",
+			S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_veinfo_operations;
+	else
+		printk(KERN_WARNING "venet: can't make veinfo proc entry\n");
+
+	de = create_proc_entry_mod("vz/veip", 
+			S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_veip_operations;
+	else
+		printk(KERN_WARNING "venet: can't make veip proc entry\n");
+#endif
+
+	ve_hook_register(&venet_ve_hook_init);
+	ve_hook_register(&venet_ve_hook_fini);
+	vzioctl_register(&venetcalls);
+	return 0;
+}
+
+static __exit void veip_cleanup(void)
+{
+	int i;
+	struct veip_struct *veip;
+
+	write_lock_irq(&veip_hash_lock);
+	for (i = 0; i < VEIP_HASH_SZ; i++)
+		while (!list_empty(ip_entry_hash_table + i)) {
+			struct ip_entry_struct *entry;
+
+			entry = list_first_entry(ip_entry_hash_table + i,
+					struct ip_entry_struct, ip_hash);
+			list_del(&entry->ip_hash);
+			kfree(entry);
+		}
+
+	list_for_each_entry(veip, &veip_lh, list)
+		veip_list_cleanup(veip);
+	while (!list_empty(&veip_lh)) {
+		veip = list_first_entry(&veip_lh, struct veip_struct, list);
+		list_del(&veip->list);
+		kfree(veip);
+	}
+	write_unlock_irq(&veip_hash_lock);
+}
+
+__exit void venet_exit(void)
+{
+	struct net_device *dev_venet;
+
+	vzioctl_unregister(&venetcalls);
+	ve_hook_unregister(&venet_ve_hook_fini);
+	ve_hook_unregister(&venet_ve_hook_init);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("vz/veip", NULL);
+	remove_proc_entry("vz/veinfo", NULL);
+#endif
+
+	dev_venet = get_ve0()->_venet_dev;
+	if (dev_venet != NULL) {
+		get_ve0()->_venet_dev = NULL;
+		unregister_netdev(dev_venet);
+		free_netdev(dev_venet);
+	}
+	veip_stop(get_ve0());
+
+	veip_cleanup();
+}
+
+module_init(venet_init);
+module_exit(venet_exit);
diff -Nurap linux-2.6.9-100.orig/drivers/net/veth.c linux-2.6.9-ve023stab054/drivers/net/veth.c
--- linux-2.6.9-100.orig/drivers/net/veth.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/drivers/net/veth.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,613 @@
+/*
+ *  veth.c
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Virtual ethernet device used to change VE ownership on packets
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/if_ether.h>	/* For the statistics structure. */
+#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
+#include <linux/ve_proto.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_veth.h>
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/vzcalluser.h>
+#include <linux/nfcalls.h>
+
+struct veth_struct
+{
+	struct net_device_stats stats;
+	struct net_device	*pair;
+	struct list_head	hwaddr_list;
+	struct net_device_stats	*real_stats;
+};
+
+static struct list_head veth_hwaddr_list;
+static rwlock_t ve_hwaddr_lock = RW_LOCK_UNLOCKED;
+static DECLARE_MUTEX(hwaddr_sem);
+
+#define veth_from_netdev(dev) \
+	((struct veth_struct *)(netdev_priv(dev)))
+#define veth_to_netdev(veth) \
+	((struct net_device*)((char*)veth - \
+	(unsigned long)netdev_priv(NULL)))
+
+static inline struct net_device_stats *
+veth_stats(struct net_device *dev, int cpuid)
+{
+	return per_cpu_ptr(veth_from_netdev(dev)->real_stats, cpuid);
+}
+
+struct net_device * veth_dev_start(char *dev_addr, char *name);
+
+struct veth_struct *hwaddr_entry_lookup(char *name)
+{
+	struct veth_struct *entry;
+	struct list_head *tmp;
+
+	list_for_each(tmp, &veth_hwaddr_list) {
+		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
+		BUG_ON(entry->pair == NULL);
+		if (strncmp(name, entry->pair->name, IFNAMSIZ) == 0)
+			return entry;
+	}
+	return NULL;
+}
+
+int veth_entry_add(struct ve_struct *ve, char *dev_addr, char *name,
+		char *dev_addr_ve, char *name_ve)
+{
+	struct net_device *dev_ve;
+	struct net_device *dev_ve0;
+	struct ve_struct *old_env;
+	char dev_name[IFNAMSIZ];
+	int err;
+
+	down(&hwaddr_sem);
+
+	if (name[0] == '\0')
+		snprintf(dev_name, sizeof(dev_name), "vz%d.%%d", ve->veid);
+	else {
+		memcpy(dev_name, name, IFNAMSIZ);
+		dev_name[IFNAMSIZ - 1] = '\0';
+	}
+	dev_ve0 = veth_dev_start(dev_addr, dev_name);
+	if (IS_ERR(dev_ve0)) {
+		err = PTR_ERR(dev_ve0);
+		goto err;
+	}
+
+	old_env = set_exec_env(ve);
+	if (name_ve[0] == '\0')
+		sprintf(dev_name, "eth%%d");
+	else {
+		memcpy(dev_name, name_ve, IFNAMSIZ);
+		dev_name[IFNAMSIZ - 1] = '\0';
+	}
+	dev_ve = veth_dev_start(dev_addr_ve, dev_name);
+	if (IS_ERR(dev_ve)) {
+		err = PTR_ERR(dev_ve);
+		goto err_ve;
+	}
+	set_exec_env(old_env);
+	veth_from_netdev(dev_ve)->pair = dev_ve0;
+	veth_from_netdev(dev_ve0)->pair = dev_ve;
+
+	write_lock(&ve_hwaddr_lock);
+	list_add(&(veth_from_netdev(dev_ve)->hwaddr_list), &veth_hwaddr_list);
+	write_unlock(&ve_hwaddr_lock);
+
+	up(&hwaddr_sem);
+	return 0;
+
+err_ve:
+	set_exec_env(old_env);
+	unregister_netdev(dev_ve0);
+err:
+	up(&hwaddr_sem);
+	return err;
+}
+
+void veth_pair_del(struct ve_struct *env, struct veth_struct *entry)
+{
+	struct net_device *dev;
+	struct ve_struct *old_env;
+
+	write_lock(&ve_hwaddr_lock);
+	list_del(&entry->hwaddr_list);
+	write_unlock(&ve_hwaddr_lock);
+
+	dev = entry->pair;
+	BUG_ON(entry->pair == NULL);
+
+	veth_from_netdev(dev)->pair = NULL;
+	entry->pair = NULL;
+	rtnl_lock();
+	old_env = set_exec_env(dev->owner_env);
+	dev_close(dev);
+
+	/*
+	 * Now device from VE0 does not send or receive anything,
+	 * i.e. dev->hard_start_xmit won't be called.
+	 */
+	set_exec_env(env);
+	unregister_netdevice(veth_to_netdev(entry));
+	set_exec_env(dev->owner_env);
+	unregister_netdevice(dev);
+	set_exec_env(old_env);
+	rtnl_unlock();
+}
+
+int veth_entry_del(struct ve_struct *ve, char *name)
+{
+	struct veth_struct *found;
+	int err;
+
+	err = -ENODEV;
+	down(&hwaddr_sem);
+	found = hwaddr_entry_lookup(name);
+	if (found == NULL)
+		goto out;
+	if (veth_to_netdev(found)->owner_env != ve)
+		goto out;
+
+	err = 0;
+	veth_pair_del(ve, found);
+
+out:
+	up(&hwaddr_sem);
+	return err;
+}
+
+/*
+ * Device functions
+ */
+
+static int veth_open(struct net_device *dev)
+{
+	return 0;
+}
+
+static int veth_close(struct net_device *master)
+{
+	return 0;
+}
+
+static void veth_destructor(struct net_device *dev)
+{
+	free_percpu(veth_from_netdev(dev)->real_stats);
+	free_netdev(dev);
+}
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+	int i;
+	struct net_device_stats *stats;
+
+	stats = &veth_from_netdev(dev)->stats;
+	memset(stats, 0, sizeof(struct net_device_stats));
+	for (i=0; i < NR_CPUS; i++) {
+		struct net_device_stats *dev_stats;
+
+		if (!cpu_possible(i)) 
+			continue;
+		dev_stats = veth_stats(dev, i);
+		stats->rx_bytes   += dev_stats->rx_bytes;
+		stats->tx_bytes   += dev_stats->tx_bytes;
+		stats->rx_packets += dev_stats->rx_packets;
+		stats->tx_packets += dev_stats->tx_packets;
+	}
+
+	return stats;
+}
+
+/*
+ * The higher levels take care of making this non-reentrant (it's
+ * called with bh's disabled).
+ */
+static int veth_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats;
+	struct net_device *rcv = NULL;
+	struct veth_struct *entry;
+	int length;
+
+	stats = veth_stats(dev, smp_processor_id());
+	if (unlikely(get_exec_env()->disable_net))
+		goto outf;
+
+	skb_orphan(skb);
+
+	entry = veth_from_netdev(dev);
+	rcv = entry->pair;
+	if (!rcv)
+		/* VE going down */
+		goto outf;
+
+	if (!(rcv->flags & IFF_UP)) {
+		/* Target VE does not want to receive packets */
+		goto outf;
+	}
+
+	if (unlikely(rcv->owner_env->disable_net))
+		goto outf;
+
+	skb->owner_env = rcv->owner_env;
+
+	skb->dev = rcv;
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, rcv);
+
+	dst_release(skb->dst);
+	skb->dst = NULL;
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = NULL;
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 0;
+#endif
+#endif
+	length = skb->len;
+
+	netif_rx(skb);
+
+	stats->tx_bytes += length;
+	stats->tx_packets++;
+	if (rcv) {
+		struct net_device_stats *rcv_stats;
+		rcv_stats = veth_stats(rcv, smp_processor_id());
+		rcv_stats->rx_bytes += length;
+		rcv_stats->rx_packets++;
+	}
+
+	return 0;
+
+outf:
+	kfree_skb(skb);
+	stats->tx_dropped++;
+	return 0;
+}
+
+int veth_init_dev(struct net_device *dev)
+{
+	dev->hard_start_xmit = veth_xmit;
+	dev->get_stats = get_stats;
+	dev->open = veth_open;
+	dev->stop = veth_close;
+	dev->destructor = veth_destructor;
+
+	ether_setup(dev);
+
+	/* remove setted by ether_setup() handler */
+	dev->change_mtu	= NULL;
+
+	dev->tx_queue_len = 0;
+
+	veth_from_netdev(dev)->real_stats =
+		alloc_percpu(struct net_device_stats);
+	if (veth_from_netdev(dev)->real_stats == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void veth_setup(struct net_device *dev)
+{
+	dev->init = veth_init_dev;
+	/*
+	 * No other features, as they are:
+	 *  - checksumming is required, and nobody else will done our job
+	 */
+	dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL | NETIF_F_LLTX;
+}
+
+#ifdef CONFIG_PROC_FS
+#define ADDR_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
+#define ADDR_ARG(x) (x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5]
+static int vehwaddr_seq_show(struct seq_file *m, void *v)
+{
+	struct list_head *p;
+	struct veth_struct *entry;
+
+	p = (struct list_head *)v;
+	if (p == &veth_hwaddr_list) {
+		seq_puts(m, "Version: 1.0\n");
+		return 0;
+	}
+	entry = list_entry(p, struct veth_struct, hwaddr_list);
+	seq_printf(m, ADDR_FMT " %16s ",
+			ADDR_ARG(entry->pair->dev_addr), entry->pair->name);
+	seq_printf(m, ADDR_FMT " %16s %10u\n",
+			ADDR_ARG(veth_to_netdev(entry)->dev_addr),
+			veth_to_netdev(entry)->name,
+			VEID(veth_to_netdev(entry)->owner_env));
+	return 0;
+}
+
+static void *vehwaddr_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t l;
+	struct list_head *p;
+
+	l = *pos;
+	read_lock(&ve_hwaddr_lock);
+	if (l == 0)
+		return &veth_hwaddr_list;
+	list_for_each(p, &veth_hwaddr_list) {
+		if (--l == 0)
+			return p;
+	}
+	return NULL;
+}
+
+static void *vehwaddr_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *p;
+
+	p = (struct list_head *)v;
+	(*pos)++;
+	return p->next == &veth_hwaddr_list ? NULL : p->next;
+}
+
+static void vehwaddr_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_hwaddr_lock);
+}
+
+static struct seq_operations vehwaddr_seq_op = {
+	.start 	= vehwaddr_seq_start,
+	.next	= vehwaddr_seq_next,
+	.stop	= vehwaddr_seq_stop,
+	.show	= vehwaddr_seq_show
+};
+
+static int vehwaddr_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &vehwaddr_seq_op);
+}
+
+static struct file_operations proc_vehwaddr_operations = {
+	.open		= vehwaddr_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release
+};
+#endif
+
+int real_ve_hwaddr(envid_t veid, int op,
+		unsigned char *dev_addr, int addrlen, char *name,
+		unsigned char *dev_addr_ve, int addrlen_ve, char *name_ve)
+{
+	int err;
+	struct ve_struct *ve;
+	char ve_addr[ETH_ALEN];
+
+	err = -EPERM;
+	if (!capable(CAP_NET_ADMIN))
+		goto out;
+
+	err = -EINVAL;
+	switch (op)
+	{
+		case VE_ETH_ADD:
+			if (addrlen != ETH_ALEN)
+				goto out;
+			if (addrlen_ve != ETH_ALEN && addrlen_ve != 0)
+				goto out;
+			/* If ve addr is not set then we use dev_addr[3] & 0x80 for it */
+			if (addrlen_ve == 0 && (dev_addr[3] & 0x80))
+				goto out;
+			if (addrlen_ve == 0) {
+				memcpy(ve_addr, dev_addr, ETH_ALEN);
+				ve_addr[3] |= 0x80;
+			} else {
+				memcpy(ve_addr, dev_addr_ve, ETH_ALEN);
+			}
+
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veth_entry_add(ve, dev_addr, name,
+						ve_addr, name_ve);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+
+		case VE_ETH_DEL:
+			if (name[0] == '\0')
+				goto out;
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veth_entry_del(ve, name);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+	}
+
+out:
+	return err;
+}
+
+int veth_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	    case VETHCTL_VE_HWADDR: {
+			struct vzctl_ve_hwaddr s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = real_ve_hwaddr(s.veid, s.op,
+					s.dev_addr, s.addrlen, s.dev_name,
+					s.dev_addr_ve, s.addrlen_ve, s.dev_name_ve);
+		}
+		break;
+	}
+	return err;
+}
+
+static struct vzioctlinfo vethcalls = {
+	.type	= VETHCTLTYPE,
+	.func	= veth_ioctl,
+	.owner	= THIS_MODULE,
+};
+
+struct net_device * veth_dev_start(char *dev_addr, char *name)
+{
+	struct net_device *dev;
+	int err;
+
+	dev = alloc_netdev(sizeof(struct veth_struct), name, veth_setup);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
+	if (strchr(dev->name, '%')) {
+		err = dev_alloc_name(dev, dev->name);
+		if (err < 0)
+			goto err;
+	}
+	if ((err = register_netdev(dev)) != 0)
+		goto err;
+
+	memcpy(dev->dev_addr, dev_addr, ETH_ALEN);
+	dev->addr_len = ETH_ALEN;
+
+	return dev;
+err:
+	free_netdev(dev);
+	printk(KERN_ERR "%s initialization error err=%d\n", name, err);
+	return ERR_PTR(err);
+}
+
+static int veth_stop(unsigned int hooknum, void *data)
+{
+	struct ve_struct *env;
+	struct list_head *tmp, *n;
+
+	env = (struct ve_struct *)data;
+	down(&hwaddr_sem);
+	list_for_each_safe(tmp, n, &veth_hwaddr_list) {
+		struct veth_struct *entry;
+		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
+		if (VEID(env) != VEID(veth_to_netdev(entry)->owner_env))
+			continue;
+
+		veth_pair_del(env, entry);
+	}
+	up(&hwaddr_sem);
+	return 0;
+}
+
+#define VE_HOOK_PRI_NET		0
+
+static struct ve_hook veth_ve_hook_fini = {
+	.hook		= veth_stop,
+	.hooknum	= VE_HOOK_FINI,
+	.priority	= VE_HOOK_PRI_NET,
+	.owner		= THIS_MODULE,
+};
+
+__init int veth_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *de;
+#endif
+
+	INIT_LIST_HEAD(&veth_hwaddr_list);
+
+#ifdef CONFIG_PROC_FS
+	de = create_proc_entry_mod("vz/veth",
+			S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_vehwaddr_operations;
+	else
+		printk(KERN_WARNING "veth: can't make vehwaddr proc entry\n");
+#endif
+
+	ve_hook_register(&veth_ve_hook_fini);
+	vzioctl_register(&vethcalls);
+	KSYMRESOLVE(veth_open);
+	KSYMMODRESOLVE(vzethdev);
+	return 0;
+}
+
+__exit void veth_exit(void)
+{
+	struct veth_struct *entry;
+	struct list_head *tmp, *n;
+	struct ve_struct *ve;
+
+	KSYMMODUNRESOLVE(vzethdev);
+	KSYMUNRESOLVE(veth_open);
+	vzioctl_unregister(&vethcalls);
+	ve_hook_unregister(&veth_ve_hook_fini);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("vz/veth", NULL);
+#endif
+
+	down(&hwaddr_sem);
+	list_for_each_safe(tmp, n, &veth_hwaddr_list) {
+		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
+		ve = get_ve(veth_to_netdev(entry)->owner_env);
+
+		veth_pair_del(ve, entry);
+
+		put_ve(ve);
+	}
+	up(&hwaddr_sem);
+}
+
+module_init(veth_init);
+module_exit(veth_exit);
+
+MODULE_AUTHOR("Andrey Mirkin <amirkin@sw.ru>");
+MODULE_DESCRIPTION("Virtuozzo Virtual Ethernet Device");
+MODULE_LICENSE("GPL v2");
+
diff -Nurap linux-2.6.9-100.orig/drivers/net/wireless/airo.c linux-2.6.9-ve023stab054/drivers/net/wireless/airo.c
--- linux-2.6.9-100.orig/drivers/net/wireless/airo.c	2004-10-19 01:53:21.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/net/wireless/airo.c	2011-06-15 19:26:18.000000000 +0400
@@ -2915,8 +2915,8 @@ static int airo_thread(void *data) {
 			flush_signals(current);
 
 		/* make swsusp happy with our thread */
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 
 		if (test_bit(JOB_DIE, &ai->flags))
 			break;
diff -Nurap linux-2.6.9-100.orig/drivers/pci/hotplug/pci_hotplug_core.c linux-2.6.9-ve023stab054/drivers/pci/hotplug/pci_hotplug_core.c
--- linux-2.6.9-100.orig/drivers/pci/hotplug/pci_hotplug_core.c	2004-10-19 01:54:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/pci/hotplug/pci_hotplug_core.c	2011-06-15 19:26:22.000000000 +0400
@@ -568,7 +568,7 @@ int pci_hp_register (struct hotplug_slot
 	if ((slot->info == NULL) || (slot->ops == NULL))
 		return -EINVAL;
 
-	kobject_set_name(&slot->kobj, slot->name);
+	kobject_set_name(&slot->kobj, "%s", slot->name);
 	kobj_set_kset_s(slot, pci_hotplug_slots_subsys);
 
 	/* this can fail if we have already registered a slot with the same name */
diff -Nurap linux-2.6.9-100.orig/drivers/pci/hotplug.c linux-2.6.9-ve023stab054/drivers/pci/hotplug.c
--- linux-2.6.9-100.orig/drivers/pci/hotplug.c	2004-10-19 01:55:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/pci/hotplug.c	2011-06-15 19:26:22.000000000 +0400
@@ -27,36 +27,35 @@ int pci_hotplug (struct device *dev, cha
 
 	scratch = buffer;
 
-	/* stuff we want to pass to /sbin/hotplug */
-	envp[i++] = scratch;
-	length += scnprintf (scratch, buffer_size - length, "PCI_CLASS=%04X",
-			    pdev->class);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+
+	if (add_hotplug_env_var(envp, num_envp, &i,
+				buffer, buffer_size, &length,
+				"PCI_CLASS=%04X", pdev->class))
+		return -ENOMEM;
+
+	if (add_hotplug_env_var(envp, num_envp, &i,
+				buffer, buffer_size, &length,
+				"PCI_ID=%04X:%04X", pdev->vendor, pdev->device))
 		return -ENOMEM;
-	++length;
-	scratch += length;
 
-	envp[i++] = scratch;
-	length += scnprintf (scratch, buffer_size - length, "PCI_ID=%04X:%04X",
-			    pdev->vendor, pdev->device);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	if (add_hotplug_env_var(envp, num_envp, &i,
+				buffer, buffer_size, &length,
+				"PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor,
+				pdev->subsystem_device))
 		return -ENOMEM;
-	++length;
-	scratch += length;
 
-	envp[i++] = scratch;
-	length += scnprintf (scratch, buffer_size - length,
-			    "PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor,
-			    pdev->subsystem_device);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	if (add_hotplug_env_var(envp, num_envp, &i,
+				buffer, buffer_size, &length,
+				"PCI_SLOT_NAME=%s", pci_name(pdev)))
 		return -ENOMEM;
-	++length;
-	scratch += length;
 
-	envp[i++] = scratch;
-	length += scnprintf (scratch, buffer_size - length, "PCI_SLOT_NAME=%s",
-			    pci_name(pdev));
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	if (add_hotplug_env_var(envp, num_envp, &i,
+				buffer, buffer_size, &length,
+				"MODALIAS=pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02x",
+				pdev->vendor, pdev->device,
+				pdev->subsystem_vendor, pdev->subsystem_device,
+				(u8)(pdev->class >> 16), (u8)(pdev->class >> 8),
+				(u8)(pdev->class)))
 		return -ENOMEM;
 
 	envp[i] = NULL;
diff -Nurap linux-2.6.9-100.orig/drivers/pci/probe.c linux-2.6.9-ve023stab054/drivers/pci/probe.c
--- linux-2.6.9-100.orig/drivers/pci/probe.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/pci/probe.c	2011-06-15 19:26:19.000000000 +0400
@@ -27,6 +27,7 @@ LIST_HEAD(pci_root_buses);
 EXPORT_SYMBOL(pci_root_buses);
 
 LIST_HEAD(pci_devices);
+EXPORT_SYMBOL(pci_devices);
 
 extern spinlock_t pci_bus_lock;
 
diff -Nurap linux-2.6.9-100.orig/drivers/pcmcia/cs.c linux-2.6.9-ve023stab054/drivers/pcmcia/cs.c
--- linux-2.6.9-100.orig/drivers/pcmcia/cs.c	2004-10-19 01:53:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/pcmcia/cs.c	2011-06-15 19:26:18.000000000 +0400
@@ -714,8 +714,8 @@ static int pccardd(void *__skt)
 		}
 
 		schedule();
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 
 		if (!skt->thread)
 			break;
diff -Nurap linux-2.6.9-100.orig/drivers/pnp/pnpbios/core.c linux-2.6.9-ve023stab054/drivers/pnp/pnpbios/core.c
--- linux-2.6.9-100.orig/drivers/pnp/pnpbios/core.c	2011-06-09 19:22:37.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/pnp/pnpbios/core.c	2011-06-15 19:26:22.000000000 +0400
@@ -56,7 +56,7 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/slab.h>
-#include <linux/kmod.h>
+#include <linux/kobject_uevent.h>
 #include <linux/completion.h>
 #include <linux/spinlock.h>
 #include <linux/dmi.h>
diff -Nurap linux-2.6.9-100.orig/drivers/s390/crypto/z90main.c linux-2.6.9-ve023stab054/drivers/s390/crypto/z90main.c
--- linux-2.6.9-100.orig/drivers/s390/crypto/z90main.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/s390/crypto/z90main.c	2011-06-15 19:26:22.000000000 +0400
@@ -34,6 +34,7 @@
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/kobject_uevent.h>
 #include <linux/proc_fs.h>
 #include <linux/syscalls.h>
 #include <linux/version.h>
diff -Nurap linux-2.6.9-100.orig/drivers/sbus/char/bbc_envctrl.c linux-2.6.9-ve023stab054/drivers/sbus/char/bbc_envctrl.c
--- linux-2.6.9-100.orig/drivers/sbus/char/bbc_envctrl.c	2004-10-19 01:55:36.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/sbus/char/bbc_envctrl.c	2011-06-15 19:26:19.000000000 +0400
@@ -613,7 +613,7 @@ void bbc_envctrl_cleanup(void)
 			int found = 0;
 
 			read_lock(&tasklist_lock);
-			for_each_process(p) {
+			for_each_process_all(p) {
 				if (p == kenvctrld_task) {
 					found = 1;
 					break;
diff -Nurap linux-2.6.9-100.orig/drivers/sbus/char/envctrl.c linux-2.6.9-ve023stab054/drivers/sbus/char/envctrl.c
--- linux-2.6.9-100.orig/drivers/sbus/char/envctrl.c	2004-10-19 01:53:22.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/sbus/char/envctrl.c	2011-06-15 19:26:19.000000000 +0400
@@ -1151,7 +1151,7 @@ static void __exit envctrl_cleanup(void)
 			int found = 0;
 
 			read_lock(&tasklist_lock);
-			for_each_process(p) {
+			for_each_process_all(p) {
 				if (p == kenvctrld_task) {
 					found = 1;
 					break;
diff -Nurap linux-2.6.9-100.orig/drivers/scsi/aic7xxx/aic79xx_osm.c linux-2.6.9-ve023stab054/drivers/scsi/aic7xxx/aic79xx_osm.c
--- linux-2.6.9-100.orig/drivers/scsi/aic7xxx/aic79xx_osm.c	2011-06-09 19:22:47.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/scsi/aic7xxx/aic79xx_osm.c	2011-06-15 19:26:18.000000000 +0400
@@ -2597,7 +2597,6 @@ ahd_linux_dv_thread(void *data)
 	sprintf(current->comm, "ahd_dv_%d", ahd->unit);
 #else
 	daemonize("ahd_dv_%d", ahd->unit);
-	current->flags |= PF_FREEZE;
 #endif
 	unlock_kernel();
 
diff -Nurap linux-2.6.9-100.orig/drivers/scsi/aic7xxx/aic7xxx_osm.c linux-2.6.9-ve023stab054/drivers/scsi/aic7xxx/aic7xxx_osm.c
--- linux-2.6.9-100.orig/drivers/scsi/aic7xxx/aic7xxx_osm.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/scsi/aic7xxx/aic7xxx_osm.c	2011-06-15 19:26:18.000000000 +0400
@@ -2300,7 +2300,6 @@ ahc_linux_dv_thread(void *data)
 	sprintf(current->comm, "ahc_dv_%d", ahc->unit);
 #else
 	daemonize("ahc_dv_%d", ahc->unit);
-	current->flags |= PF_FREEZE;
 #endif
 	unlock_kernel();
 
diff -Nurap linux-2.6.9-100.orig/drivers/scsi/lpfc/Makefile linux-2.6.9-ve023stab054/drivers/scsi/lpfc/Makefile
--- linux-2.6.9-100.orig/drivers/scsi/lpfc/Makefile	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/scsi/lpfc/Makefile	2011-06-15 19:26:19.000000000 +0400
@@ -6,7 +6,10 @@ lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_
 lpfc_hbadisc.o lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsiport.o \
 lpfc_fcp.o lpfc_menlo.o
 
-lpfcdfc-objs := lpfc_cdev.o lpfc_ct.o lpfc_debug_ioctl.o lpfc_els.o	\
-lpfc_hbaapi_ioctl.o lpfc_hbadisc.o lpfc_init.o lpfc_mbox.o lpfc_mem.o	\
-lpfc_misc.o lpfc_nportdisc.o lpfc_scsiport.o lpfc_sli.o lpfc_menlo.o	\
-lpfc_util_ioctl.o
+lpfcdfc-objs := lpfc_cdev.o lpfc_debug_ioctl.o lpfc_hbaapi_ioctl.o \
+lpfc_misc.o lpfc_util_ioctl.o
+
+ifeq ($(CONFIG_SCSI_LPFC), m)
+lpfcdfc-objs += lpfc_ct.o lpfc_els.o lpfc_hbadisc.o lpfc_init.o lpfc_mbox.o \
+lpfc_mem.o lpfc_nportdisc.o lpfc_scsiport.o lpfc_sli.o lpfc_menlo.o
+endif
diff -Nurap linux-2.6.9-100.orig/drivers/scsi/lpfc/lpfc_fcp.c linux-2.6.9-ve023stab054/drivers/scsi/lpfc/lpfc_fcp.c
--- linux-2.6.9-100.orig/drivers/scsi/lpfc/lpfc_fcp.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/scsi/lpfc/lpfc_fcp.c	2011-06-15 19:26:19.000000000 +0400
@@ -60,7 +60,7 @@ static char *lpfc_drvr_name = LPFC_DRIVE
 
 static struct scsi_transport_template *lpfc_transport_template = NULL;
 
-static struct list_head lpfc_hba_list = LIST_HEAD_INIT(lpfc_hba_list);
+struct list_head lpfc_hba_list = LIST_HEAD_INIT(lpfc_hba_list);
 
 static const char *
 lpfc_info(struct Scsi_Host *host)
diff -Nurap linux-2.6.9-100.orig/drivers/serial/8250.c linux-2.6.9-ve023stab054/drivers/serial/8250.c
--- linux-2.6.9-100.orig/drivers/serial/8250.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/serial/8250.c	2011-06-15 19:26:18.000000000 +0400
@@ -20,27 +20,28 @@
  *  membase is an 'ioremapped' cookie.
  */
 #include <linux/config.h>
+#if defined(CONFIG_SERIAL_8250_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/tty.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/sysrq.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
 #include <linux/serial_reg.h>
+#include <linux/serial_core.h>
 #include <linux/serial.h>
 #include <linux/serialP.h>
-#include <linux/delay.h>
-#include <linux/device.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
 
-#if defined(CONFIG_SERIAL_8250_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
-#define SUPPORT_SYSRQ
-#endif
-
-#include <linux/serial_core.h>
 #include "8250.h"
 
 #include <linux/nmi.h>
@@ -890,18 +891,22 @@ receive_chars(struct uart_8250_port *up,
 	struct tty_struct *tty = up->port.info->tty;
 	unsigned char ch;
 	int max_count = 256;
+	char flag;
 
 	do {
+		/* The following is not allowed by the tty layer and
+		   unsafe. It should be fixed ASAP */
 		if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) {
-			spin_unlock(&up->port.lock);
-			tty->flip.work.func((void *)tty);
-			spin_lock(&up->port.lock);
-			if (tty->flip.count >= TTY_FLIPBUF_SIZE)
-				return; // if TTY_DONT_FLIP is set
+			if(tty->low_latency) {
+				spin_unlock(&up->port.lock);
+				tty_flip_buffer_push(tty);
+				spin_lock(&up->port.lock);
+			}
+			/* If this failed then we will throw away the
+			   bytes but must do so to clear interrupts */
 		}
 		ch = serial_inp(up, UART_RX);
-		*tty->flip.char_buf_ptr = ch;
-		*tty->flip.flag_buf_ptr = TTY_NORMAL;
+		flag = TTY_NORMAL;
 		up->port.icount.rx++;
 
 		if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE |
@@ -941,31 +946,24 @@ receive_chars(struct uart_8250_port *up,
 #endif
 			if (*status & UART_LSR_BI) {
 				DEBUG_INTR("handling break....");
-				*tty->flip.flag_buf_ptr = TTY_BREAK;
+				flag = TTY_BREAK;
 			} else if (*status & UART_LSR_PE)
-				*tty->flip.flag_buf_ptr = TTY_PARITY;
+				flag = TTY_PARITY;
 			else if (*status & UART_LSR_FE)
-				*tty->flip.flag_buf_ptr = TTY_FRAME;
+				flag = TTY_FRAME;
 		}
 		if (uart_handle_sysrq_char(&up->port, ch, regs))
 			goto ignore_char;
-		if ((*status & up->port.ignore_status_mask) == 0) {
-			tty->flip.flag_buf_ptr++;
-			tty->flip.char_buf_ptr++;
-			tty->flip.count++;
-		}
+		if ((*status & up->port.ignore_status_mask) == 0)
+			tty_insert_flip_char(tty, ch, flag);
 		if ((*status & UART_LSR_OE) &&
-		    tty->flip.count < TTY_FLIPBUF_SIZE) {
+		    tty->flip.count < TTY_FLIPBUF_SIZE)
 			/*
 			 * Overrun is special, since it's reported
 			 * immediately, and doesn't affect the current
 			 * character.
 			 */
-			*tty->flip.flag_buf_ptr = TTY_OVERRUN;
-			tty->flip.flag_buf_ptr++;
-			tty->flip.char_buf_ptr++;
-			tty->flip.count++;
-		}
+			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
 	ignore_char:
 		*status = serial_inp(up, UART_LSR);
 	} while ((*status & UART_LSR_DR) && (max_count-- > 0));
diff -Nurap linux-2.6.9-100.orig/drivers/usb/core/devio.c linux-2.6.9-ve023stab054/drivers/usb/core/devio.c
--- linux-2.6.9-100.orig/drivers/usb/core/devio.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/usb/core/devio.c	2011-06-15 19:26:19.000000000 +0400
@@ -504,7 +504,7 @@ static int usbdev_open(struct inode *ino
 	INIT_LIST_HEAD(&ps->async_completed);
 	init_waitqueue_head(&ps->wait);
 	ps->discsignr = 0;
-	ps->disc_pid = current->pid;
+	ps->disc_pid = virt_pid(current);
 	ps->disc_uid = current->uid;
 	ps->disc_euid = current->euid;
 	ps->disccontext = NULL;
@@ -963,7 +963,7 @@ static int proc_submiturb(struct dev_sta
 		as->userbuffer = NULL;
 	as->signr = uurb.signr;
 	as->ifnum = ifnum;
-	as->pid = current->pid;
+	as->pid = virt_pid(current);
 	as->uid = current->uid;
 	as->euid = current->euid;
 	if (!(uurb.endpoint & USB_DIR_IN)) {
diff -Nurap linux-2.6.9-100.orig/drivers/usb/core/file.c linux-2.6.9-ve023stab054/drivers/usb/core/file.c
--- linux-2.6.9-100.orig/drivers/usb/core/file.c	2004-10-19 01:55:35.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/usb/core/file.c	2011-06-15 19:26:22.000000000 +0400
@@ -107,13 +107,6 @@ void usb_major_cleanup(void)
 	unregister_chrdev(USB_MAJOR, "usb");
 }
 
-static ssize_t show_dev(struct class_device *class_dev, char *buf)
-{
-	int minor = (int)(long)class_get_devdata(class_dev);
-	return print_dev_t(buf, MKDEV(USB_MAJOR, minor));
-}
-static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
-
 /**
  * usb_register_dev - register a USB device, and ask for a minor number
  * @intf: pointer to the usb_interface that is being registered
@@ -184,6 +177,7 @@ int usb_register_dev(struct usb_interfac
 	class_dev = kmalloc(sizeof(*class_dev), GFP_KERNEL);
 	if (class_dev) {
 		memset(class_dev, 0x00, sizeof(struct class_device));
+		class_dev->devt = MKDEV(USB_MAJOR, minor);
 		class_dev->class = &usb_class;
 		class_dev->dev = &intf->dev;
 
@@ -195,7 +189,6 @@ int usb_register_dev(struct usb_interfac
 		snprintf(class_dev->class_id, BUS_ID_SIZE, "%s", temp);
 		class_set_devdata(class_dev, (void *)(long)intf->minor);
 		class_device_register(class_dev);
-		class_device_create_file(class_dev, &class_device_attr_dev);
 		intf->class_dev = class_dev;
 	}
 exit:
diff -Nurap linux-2.6.9-100.orig/drivers/usb/core/hub.c linux-2.6.9-ve023stab054/drivers/usb/core/hub.c
--- linux-2.6.9-100.orig/drivers/usb/core/hub.c	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/usb/core/hub.c	2011-06-15 19:26:18.000000000 +0400
@@ -2562,8 +2562,8 @@ static int hub_thread(void *__unused)
 	do {
 		hub_events();
 		wait_event_interruptible(khubd_wait, !list_empty(&hub_event_list)); 
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 	} while (!signal_pending(current));
 
 	pr_debug ("%s: khubd exiting\n", usbcore_name);
diff -Nurap linux-2.6.9-100.orig/drivers/usb/core/usb.c linux-2.6.9-ve023stab054/drivers/usb/core/usb.c
--- linux-2.6.9-100.orig/drivers/usb/core/usb.c	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/usb/core/usb.c	2011-06-15 19:26:22.000000000 +0400
@@ -563,7 +563,6 @@ static int usb_hotplug (struct device *d
 {
 	struct usb_interface *intf;
 	struct usb_device *usb_dev;
-	char *scratch;
 	int i = 0;
 	int length = 0;
 
@@ -590,8 +589,6 @@ static int usb_hotplug (struct device *d
 		return -ENODEV;
 	}
 
-	scratch = buffer;
-
 #ifdef	CONFIG_USB_DEVICEFS
 	/* If this is available, userspace programs can directly read
 	 * all the device descriptors we don't tell them about.  Or
@@ -599,37 +596,30 @@ static int usb_hotplug (struct device *d
 	 *
 	 * FIXME reduce hardwired intelligence here
 	 */
-	envp [i++] = scratch;
-	length += snprintf (scratch, buffer_size - length,
-			    "DEVICE=/proc/bus/usb/%03d/%03d",
-			    usb_dev->bus->busnum, usb_dev->devnum);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	if (add_hotplug_env_var(envp, num_envp, &i,
+				buffer, buffer_size, &length,
+				"DEVICE=/proc/bus/usb/%03d/%03d",
+				usb_dev->bus->busnum, usb_dev->devnum))
 		return -ENOMEM;
-	++length;
-	scratch += length;
 #endif
 
 	/* per-device configurations are common */
-	envp [i++] = scratch;
-	length += snprintf (scratch, buffer_size - length, "PRODUCT=%x/%x/%x",
-			    usb_dev->descriptor.idVendor,
-			    usb_dev->descriptor.idProduct,
-			    usb_dev->descriptor.bcdDevice);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	if (add_hotplug_env_var(envp, num_envp, &i,
+				buffer, buffer_size, &length,
+				"PRODUCT=%x/%x/%x",
+				usb_dev->descriptor.idVendor,
+				usb_dev->descriptor.idProduct,
+				usb_dev->descriptor.bcdDevice))
 		return -ENOMEM;
-	++length;
-	scratch += length;
 
 	/* class-based driver binding models */
-	envp [i++] = scratch;
-	length += snprintf (scratch, buffer_size - length, "TYPE=%d/%d/%d",
-			    usb_dev->descriptor.bDeviceClass,
-			    usb_dev->descriptor.bDeviceSubClass,
-			    usb_dev->descriptor.bDeviceProtocol);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	if (add_hotplug_env_var(envp, num_envp, &i,
+				buffer, buffer_size, &length,
+				"TYPE=%d/%d/%d",
+				usb_dev->descriptor.bDeviceClass,
+				usb_dev->descriptor.bDeviceSubClass,
+				usb_dev->descriptor.bDeviceProtocol))
 		return -ENOMEM;
-	++length;
-	scratch += length;
 
 	if (usb_dev->descriptor.bDeviceClass == 0) {
 		struct usb_host_interface *alt = intf->cur_altsetting;
@@ -638,19 +628,43 @@ static int usb_hotplug (struct device *d
 		 * agents are called for all interfaces, and can use
 		 * $DEVPATH/bInterfaceNumber if necessary.
 		 */
-		envp [i++] = scratch;
-		length += snprintf (scratch, buffer_size - length,
-			    "INTERFACE=%d/%d/%d",
-			    alt->desc.bInterfaceClass,
-			    alt->desc.bInterfaceSubClass,
-			    alt->desc.bInterfaceProtocol);
-		if ((buffer_size - length <= 0) || (i >= num_envp))
+		if (add_hotplug_env_var(envp, num_envp, &i,
+					buffer, buffer_size, &length,
+					"INTERFACE=%d/%d/%d",
+					alt->desc.bInterfaceClass,
+					alt->desc.bInterfaceSubClass,
+					alt->desc.bInterfaceProtocol))
 			return -ENOMEM;
-		++length;
-		scratch += length;
 
+		if (add_hotplug_env_var(envp, num_envp, &i,
+					buffer, buffer_size, &length,
+					"MODALIAS=usb:v%04Xp%04Xdl%04Xdh%04Xdc%02Xdsc%02Xdp%02Xic%02Xisc%02Xip%02X",
+					le16_to_cpu(usb_dev->descriptor.idVendor),
+					le16_to_cpu(usb_dev->descriptor.idProduct),
+					le16_to_cpu(usb_dev->descriptor.bcdDevice),
+					le16_to_cpu(usb_dev->descriptor.bcdDevice),
+					usb_dev->descriptor.bDeviceClass,
+					usb_dev->descriptor.bDeviceSubClass,
+					usb_dev->descriptor.bDeviceProtocol,
+					alt->desc.bInterfaceClass,
+					alt->desc.bInterfaceSubClass,
+					alt->desc.bInterfaceProtocol))
+			return -ENOMEM;
+ 	} else {
+		if (add_hotplug_env_var(envp, num_envp, &i,
+					buffer, buffer_size, &length,
+					"MODALIAS=usb:v%04Xp%04Xdl%04Xdh%04Xdc%02Xdsc%02Xdp%02Xic*isc*ip*",
+					le16_to_cpu(usb_dev->descriptor.idVendor),
+					le16_to_cpu(usb_dev->descriptor.idProduct),
+					le16_to_cpu(usb_dev->descriptor.bcdDevice),
+					le16_to_cpu(usb_dev->descriptor.bcdDevice),
+					usb_dev->descriptor.bDeviceClass,
+					usb_dev->descriptor.bDeviceSubClass,
+					usb_dev->descriptor.bDeviceProtocol))
+			return -ENOMEM;
 	}
-	envp[i++] = NULL;
+
+	envp[i] = NULL;
 
 	return 0;
 }
diff -Nurap linux-2.6.9-100.orig/drivers/w1/w1.c linux-2.6.9-ve023stab054/drivers/w1/w1.c
--- linux-2.6.9-100.orig/drivers/w1/w1.c	2004-10-19 01:53:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/drivers/w1/w1.c	2011-06-15 19:26:18.000000000 +0400
@@ -617,8 +617,8 @@ int w1_control(void *data)
 		timeout = w1_timeout*HZ;
 		do {
 			timeout = interruptible_sleep_on_timeout(&w1_control_wait, timeout);
-			if (current->flags & PF_FREEZE)
-				refrigerator(PF_FREEZE);
+			if (test_thread_flag(TIF_FREEZE))
+				refrigerator();
 		} while (!signal_pending(current) && (timeout > 0));
 
 		if (signal_pending(current))
@@ -690,8 +690,8 @@ int w1_process(void *data)
 		timeout = w1_timeout*HZ;
 		do {
 			timeout = interruptible_sleep_on_timeout(&dev->kwait, timeout);
-			if (current->flags & PF_FREEZE)
-				refrigerator(PF_FREEZE);
+			if (test_thread_flag(TIF_FREEZE))
+				refrigerator();
 		} while (!signal_pending(current) && (timeout > 0));
 
 		if (signal_pending(current))
diff -Nurap linux-2.6.9-100.orig/fs/afs/mntpt.c linux-2.6.9-ve023stab054/fs/afs/mntpt.c
--- linux-2.6.9-100.orig/fs/afs/mntpt.c	2004-10-19 01:53:23.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/afs/mntpt.c	2011-06-15 19:26:19.000000000 +0400
@@ -162,6 +162,7 @@ static struct vfsmount *afs_mntpt_do_aut
 	char *buf, *devname = NULL, *options = NULL;
 	filler_t *filler;
 	int ret;
+	struct file_system_type *fstype;
 
 	kenter("{%s}", mntpt->d_name.name);
 
@@ -210,7 +211,12 @@ static struct vfsmount *afs_mntpt_do_aut
 
 	/* try and do the mount */
 	kdebug("--- attempting mount %s -o %s ---", devname, options);
-	mnt = do_kern_mount("afs", 0, devname, options);
+	fstype = get_fs_type("afs");
+	ret = -ENODEV;
+	if (!fstype)
+		goto error;
+	mnt = do_kern_mount(fstype, 0, devname, options);
+	put_filesystem(fstype);
 	kdebug("--- mount result %p ---", mnt);
 
 	free_page((unsigned long) devname);
diff -Nurap linux-2.6.9-100.orig/fs/aio.c linux-2.6.9-ve023stab054/fs/aio.c
--- linux-2.6.9-100.orig/fs/aio.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/aio.c	2011-06-15 19:26:21.000000000 +0400
@@ -49,11 +49,13 @@ long aio_wakeups = 0; /* for testing onl
 
 /*------ sysctl variables----*/
 atomic_t aio_nr = ATOMIC_INIT(0);	/* current system wide number of aio requests */
+EXPORT_SYMBOL(aio_nr);
 unsigned aio_max_nr = 0x10000;	/* system wide maximum number of aio requests */
 /*----end sysctl variables---*/
 
 static kmem_cache_t	*kiocb_cachep;
 static kmem_cache_t	*kioctx_cachep;
+EXPORT_SYMBOL(kioctx_cachep);
 
 static struct workqueue_struct *aio_wq;
 
@@ -329,6 +331,7 @@ void wait_for_all_aios(struct kioctx *ct
 out:
 	spin_unlock_irq(&ctx->ctx_lock);
 }
+EXPORT_SYMBOL(wait_for_all_aios);
 
 /* wait_on_sync_kiocb:
  *	Waits on the given sync kiocb to complete.
@@ -896,6 +899,7 @@ static void aio_kick_handler(void *data)
 	if (requeue)
 		queue_work(aio_wq, &ctx->wq);
 }
+EXPORT_SYMBOL(aio_kick_handler);
 
 
 /*
diff -Nurap linux-2.6.9-100.orig/fs/autofs/autofs_i.h linux-2.6.9-ve023stab054/fs/autofs/autofs_i.h
--- linux-2.6.9-100.orig/fs/autofs/autofs_i.h	2004-10-19 01:54:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/autofs/autofs_i.h	2011-06-15 19:26:19.000000000 +0400
@@ -123,7 +123,7 @@ static inline struct autofs_sb_info *aut
    filesystem without "magic".) */
 
 static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
+	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
 }
 
 /* Hash operations */
diff -Nurap linux-2.6.9-100.orig/fs/autofs/init.c linux-2.6.9-ve023stab054/fs/autofs/init.c
--- linux-2.6.9-100.orig/fs/autofs/init.c	2004-10-19 01:54:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/autofs/init.c	2011-06-15 19:26:19.000000000 +0400
@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
 	.name		= "autofs",
 	.get_sb		= autofs_get_sb,
 	.kill_sb	= kill_anon_super,
+	.fs_flags	= FS_VIRTUALIZED,
 };
 
 static int __init init_autofs_fs(void)
diff -Nurap linux-2.6.9-100.orig/fs/autofs/inode.c linux-2.6.9-ve023stab054/fs/autofs/inode.c
--- linux-2.6.9-100.orig/fs/autofs/inode.c	2011-06-09 19:22:53.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/autofs/inode.c	2011-06-15 19:26:19.000000000 +0400
@@ -76,7 +76,7 @@ static int parse_options(char *options, 
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = virt_pgid(current);
 
 	*minproto = *maxproto = AUTOFS_PROTO_VERSION;
 
@@ -149,7 +149,7 @@ int autofs_fill_super(struct super_block
 	sbi->pipe = NULL;
 	sbi->catatonic = 1;
 	sbi->exp_timeout = 0;
-	sbi->oz_pgrp = process_group(current);
+	sbi->oz_pgrp = virt_pgid(current);
 	autofs_initialize_hash(&sbi->dirhash);
 	sbi->queues = NULL;
 	memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
diff -Nurap linux-2.6.9-100.orig/fs/autofs/root.c linux-2.6.9-ve023stab054/fs/autofs/root.c
--- linux-2.6.9-100.orig/fs/autofs/root.c	2004-10-19 01:54:08.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/autofs/root.c	2011-06-15 19:26:19.000000000 +0400
@@ -353,7 +353,7 @@ static int autofs_root_unlink(struct ino
 
 	/* This allows root to remove symlinks */
 	lock_kernel();
-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) {
+	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) ) {
 		unlock_kernel();
 		return -EACCES;
 	}
@@ -540,7 +540,7 @@ static int autofs_root_ioctl(struct inod
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
 		return -ENOTTY;
 	
-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EPERM;
 	
 	switch(cmd) {
diff -Nurap linux-2.6.9-100.orig/fs/autofs4/autofs_i.h linux-2.6.9-ve023stab054/fs/autofs4/autofs_i.h
--- linux-2.6.9-100.orig/fs/autofs4/autofs_i.h	2011-06-09 19:22:59.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/autofs4/autofs_i.h	2011-06-15 19:26:19.000000000 +0400
@@ -132,7 +132,7 @@ static inline struct autofs_info *autofs
    filesystem without "magic".) */
 
 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
+	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
 }
 
 /* Does a dentry have some pending activity? */
diff -Nurap linux-2.6.9-100.orig/fs/autofs4/init.c linux-2.6.9-ve023stab054/fs/autofs4/init.c
--- linux-2.6.9-100.orig/fs/autofs4/init.c	2011-06-09 19:22:53.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/autofs4/init.c	2011-06-15 19:26:19.000000000 +0400
@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
 	.name		= "autofs",
 	.get_sb		= autofs_get_sb,
 	.kill_sb	= autofs4_kill_sb,
+	.fs_flags	= FS_VIRTUALIZED,
 };
 
 static int __init init_autofs4_fs(void)
diff -Nurap linux-2.6.9-100.orig/fs/autofs4/inode.c linux-2.6.9-ve023stab054/fs/autofs4/inode.c
--- linux-2.6.9-100.orig/fs/autofs4/inode.c	2011-06-09 19:22:59.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/autofs4/inode.c	2011-06-15 19:26:19.000000000 +0400
@@ -233,7 +233,7 @@ static int parse_options(char *options, 
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = virt_pgid(current);
 
 	*minproto = AUTOFS_MIN_PROTO_VERSION;
 	*maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -332,7 +332,7 @@ int autofs4_fill_super(struct super_bloc
 	sbi->pipe = NULL;
 	sbi->catatonic = 1;
 	sbi->exp_timeout = 0;
-	sbi->oz_pgrp = process_group(current);
+	sbi->oz_pgrp = virt_pgid(current);
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
diff -Nurap linux-2.6.9-100.orig/fs/autofs4/root.c linux-2.6.9-ve023stab054/fs/autofs4/root.c
--- linux-2.6.9-100.orig/fs/autofs4/root.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/autofs4/root.c	2011-06-15 19:26:19.000000000 +0400
@@ -683,7 +683,7 @@ static int autofs4_dir_unlink(struct ino
 	struct autofs_info *p_ino;
 	
 	/* This allows root to remove symlinks */
-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EACCES;
 
 	if (atomic_dec_and_test(&ino->count)) {
@@ -730,7 +730,9 @@ static int autofs4_dir_rmdir(struct inod
 	if (list_empty(&ino->expiring))
 		list_add(&ino->expiring, &sbi->expiring_list);
 	spin_unlock(&sbi->lookup_lock);
+	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 
 	if (atomic_dec_and_test(&ino->count)) {
@@ -873,7 +875,7 @@ static int autofs4_root_ioctl(struct ino
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
 		return -ENOTTY;
 	
-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EPERM;
 	
 	switch(cmd) {
diff -Nurap linux-2.6.9-100.orig/fs/binfmt_aout.c linux-2.6.9-ve023stab054/fs/binfmt_aout.c
--- linux-2.6.9-100.orig/fs/binfmt_aout.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/binfmt_aout.c	2011-06-15 19:26:20.000000000 +0400
@@ -439,9 +439,11 @@ beyond_if:
 #endif
 	start_thread(regs, ex.a_entry, current->mm->start_stack);
 	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if (current->ptrace & PT_TRACE_EXEC)
+		if (current->ptrace & PT_TRACE_EXEC) {
+			set_pn_state(current, PN_STOP_EXEC);
 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-		else
+			clear_pn_state(current);
+		} else
 			send_sig(SIGTRAP, current, 0);
 	}
 	return 0;
diff -Nurap linux-2.6.9-100.orig/fs/binfmt_elf.c linux-2.6.9-ve023stab054/fs/binfmt_elf.c
--- linux-2.6.9-100.orig/fs/binfmt_elf.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/binfmt_elf.c	2011-06-15 19:26:20.000000000 +0400
@@ -103,15 +103,17 @@ static int set_brk(unsigned long start, 
    be in memory */
 
 
-static void padzero(unsigned long elf_bss)
+static int padzero(unsigned long elf_bss)
 {
 	unsigned long nbyte;
 
 	nbyte = ELF_PAGEOFFSET(elf_bss);
 	if (nbyte) {
 		nbyte = ELF_MIN_ALIGN - nbyte;
-		clear_user((void __user *) elf_bss, nbyte);
+		if (clear_user((void __user *) elf_bss, nbyte))
+			return -EFAULT;
 	}
+	return 0;
 }
 
 /* Let's use some macros to make this stack manipulation a litle clearer */
@@ -127,7 +129,7 @@ static void padzero(unsigned long elf_bs
 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 #endif
 
-static void
+static int
 create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
 		int interp_aout, unsigned long load_addr,
 		unsigned long interp_load_addr)
@@ -160,7 +162,8 @@ create_elf_tables(struct linux_binprm *b
 		p = (unsigned long)arch_align_stack((unsigned long)p);
 #endif
 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
-		__copy_to_user(u_platform, k_platform, len);
+		if (__copy_to_user(u_platform, k_platform, len))
+			return -EFAULT;
 	}
 
 	/* Create the ELF interpreter info */
@@ -222,7 +225,8 @@ create_elf_tables(struct linux_binprm *b
 #endif
 
 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
-	__put_user(argc, sp++);
+	if (__put_user(argc, sp++))
+		return -EFAULT;
 	if (interp_aout) {
 		argv = sp + 2;
 		envp = argv + argc + 1;
@@ -240,25 +244,29 @@ create_elf_tables(struct linux_binprm *b
 		__put_user((elf_addr_t)p, argv++);
 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
-			return;
+			return 0;
 		p += len;
 	}
-	__put_user(0, argv);
+	if (__put_user(0, argv))
+		return -EFAULT;
 	current->mm->arg_end = current->mm->env_start = p;
 	while (envc-- > 0) {
 		size_t len;
 		__put_user((elf_addr_t)p, envp++);
 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
-			return;
+			return 0;
 		p += len;
 	}
-	__put_user(0, envp);
+	if (__put_user(0, envp))
+		return -EFAULT;
 	current->mm->env_end = p;
 
 	/* Put the elf_info on the stack in the right place.  */
 	sp = (elf_addr_t __user *)envp + 1;
-	copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t));
+	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
+		return -EFAULT;
+	return 0;
 }
 
 #ifndef elf_map
@@ -378,7 +386,7 @@ static unsigned long load_elf_interp(str
 	eppnt = elf_phdata;
 	for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
 	  if (eppnt->p_type == PT_LOAD) {
-	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
+	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECPRIO;
 	    int elf_prot = 0;
 	    unsigned long vaddr = 0;
 	    unsigned long k, map_addr;
@@ -439,7 +447,11 @@ static unsigned long load_elf_interp(str
 	 * that there are zero-mapped pages up to and including the 
 	 * last bss page.
 	 */
-	padzero(elf_bss);
+	if (padzero(elf_bss)) {
+		error = -EFAULT;
+		goto out_close;
+	}
+
 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);	/* What we have mapped so far */
 
 	/* Map the last of the bss segment */
@@ -641,7 +653,7 @@ static int load_elf_binary(struct linux_
 			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
 				ibcs2_interpreter = 1;
 
-			interpreter = open_exec(elf_interpreter);
+			interpreter = open_exec(elf_interpreter, NULL);
 			retval = PTR_ERR(interpreter);
 			if (IS_ERR(interpreter))
 				goto out_free_interp;
@@ -651,7 +663,7 @@ static int load_elf_binary(struct linux_
 			 * mm->dumpable = 0 regardless of the interpreter's
 			 * permissions.
 			 */
-			if (permission(interpreter->f_dentry->d_inode, MAY_READ, NULL) < 0)
+			if (permission(interpreter->f_dentry->d_inode, MAY_READ, NULL, NULL) < 0)
 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 
 			retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
@@ -827,7 +839,13 @@ static int load_elf_binary(struct linux_
 				nbyte = ELF_MIN_ALIGN - nbyte;
 				if (nbyte > elf_brk - elf_bss)
 					nbyte = elf_brk - elf_bss;
-				clear_user((void __user *) elf_bss + load_bias, nbyte);
+				/*
+				 * This bss-zeroing can fail if the ELF file
+				 * specifies odd protections.  So we don't check
+				 * the return value
+				 */
+				(void)clear_user((void __user *)elf_bss +
+						 load_bias, nbyte);
 			}
 		}
 
@@ -835,7 +853,8 @@ static int load_elf_binary(struct linux_
 		if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
 		if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
 
-		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
+		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE|
+			MAP_EXECPRIO;
 
 		vaddr = elf_ppnt->p_vaddr;
 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set)
@@ -850,6 +869,8 @@ static int load_elf_binary(struct linux_
 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags, 0);
 		if (BAD_ADDR(error)) {
 			send_sig(SIGKILL, current, 0);
+			retval = IS_ERR((void *)error) ?
+				PTR_ERR((void*)error) : -EINVAL;
 			goto out_free_dentry;
 		}
 
@@ -877,6 +898,7 @@ static int load_elf_binary(struct linux_
 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
 			/* set_brk can never work.  Avoid overflows.  */
 			send_sig(SIGKILL, current, 0);
+			retval = -EINVAL;
 			goto out_free_dentry;
 		}
 
@@ -911,7 +933,11 @@ static int load_elf_binary(struct linux_
 		send_sig(SIGKILL, current, 0);
 		goto out_free_dentry;
 	}
-	padzero(elf_bss);
+	if (padzero(elf_bss)) {
+		send_sig(SIGSEGV, current, 0);
+		retval = -EFAULT; /* Nobody gets to see this, but.. */
+		goto out_free_dentry;
+	}
 
 	if (elf_interpreter) {
 		if (interpreter_type == INTERPRETER_AOUT)
@@ -953,15 +979,12 @@ static int load_elf_binary(struct linux_
 	 * Map the vsyscall trampoline. This address is then passed via
 	 * AT_SYSINFO.
 	 */
-#ifdef __HAVE_ARCH_VSYSCALL
-	map_vsyscall();
-#endif
 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
-        retval = arch_setup_additional_pages(bprm, executable_stack);
-        if (retval < 0) {
-                send_sig(SIGKILL, current, 0);
-                goto out;
-        }
+	retval = arch_setup_additional_pages(bprm, executable_stack, 0);
+	if (retval < 0) {
+		send_sig(SIGKILL, current, 0);
+		goto out;
+	}
 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 
 	compute_creds(bprm);
@@ -1008,9 +1031,11 @@ static int load_elf_binary(struct linux_
 
 	start_thread(regs, elf_entry, bprm->p);
 	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if (current->ptrace & PT_TRACE_EXEC)
+		if (current->ptrace & PT_TRACE_EXEC) {
+			set_pn_state(current, PN_STOP_EXEC);
 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-		else
+			clear_pn_state(current);
+		} else
 			send_sig(SIGTRAP, current, 0);
 	}
 	retval = 0;
@@ -1103,7 +1128,10 @@ static int load_elf_library(struct file 
 		goto out_free_ph;
 
 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
-	padzero(elf_bss);
+	if (padzero(elf_bss)) {
+		error = -EFAULT;
+		goto out_free_ph;
+	}
 
 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1);
 	bss = eppnt->p_memsz + eppnt->p_vaddr;
@@ -1323,10 +1351,10 @@ static void fill_prstatus(struct elf_prs
 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
 	prstatus->pr_sigpend = p->pending.signal.sig[0];
 	prstatus->pr_sighold = p->blocked.sig[0];
-	prstatus->pr_pid = p->pid;
-	prstatus->pr_ppid = p->parent->pid;
-	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = p->signal->session;
+	prstatus->pr_pid = virt_pid(p);
+	prstatus->pr_ppid = virt_pid(p->parent);
+	prstatus->pr_pgrp = virt_pgid(p);
+	prstatus->pr_sid = virt_sid(p);
 	if (p->pid == p->tgid) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1350,7 +1378,7 @@ static void fill_prstatus(struct elf_prs
 	jiffies_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
 }
 
-static void fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
+static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 			struct mm_struct *mm)
 {
 	unsigned int i, len;
@@ -1361,17 +1389,18 @@ static void fill_psinfo(struct elf_prpsi
 	len = mm->arg_end - mm->arg_start;
 	if (len >= ELF_PRARGSZ)
 		len = ELF_PRARGSZ-1;
-	copy_from_user(&psinfo->pr_psargs,
-		       (const char __user *)mm->arg_start, len);
+	if (copy_from_user(&psinfo->pr_psargs,
+		           (const char __user *)mm->arg_start, len))
+		return -EFAULT;
 	for(i = 0; i < len; i++)
 		if (psinfo->pr_psargs[i] == 0)
 			psinfo->pr_psargs[i] = ' ';
 	psinfo->pr_psargs[len] = 0;
 
-	psinfo->pr_pid = p->pid;
-	psinfo->pr_ppid = p->parent->pid;
-	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = p->signal->session;
+	psinfo->pr_pid = virt_pid(p);
+	psinfo->pr_ppid = virt_pid(p->parent);
+	psinfo->pr_pgrp = virt_pgid(p);
+	psinfo->pr_sid = virt_sid(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
@@ -1383,7 +1412,7 @@ static void fill_psinfo(struct elf_prpsi
 	SET_GID(psinfo->pr_gid, p->gid);
 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
 	
-	return;
+	return 0;
 }
 
 /* Here is the structure in which status of each thread is captured. */
@@ -1505,7 +1534,7 @@ static int elf_core_dump(long signr, str
 	if (signr) {
 		struct elf_thread_status *tmp;
 		read_lock(&tasklist_lock);
-		do_each_thread(g,p)
+		do_each_thread_ve(g,p)
 			if (current->mm == p->mm && current != p) {
 				tmp = kmalloc(sizeof(*tmp), GFP_ATOMIC);
 				if (!tmp) {
@@ -1517,7 +1546,7 @@ static int elf_core_dump(long signr, str
 				tmp->thread = p;
 				list_add(&tmp->list, &thread_list);
 			}
-		while_each_thread(g,p);
+		while_each_thread_ve(g,p);
 		read_unlock(&tasklist_lock);
 		list_for_each(t, &thread_list) {
 			struct elf_thread_status *tmp;
diff -Nurap linux-2.6.9-100.orig/fs/binfmt_em86.c linux-2.6.9-ve023stab054/fs/binfmt_em86.c
--- linux-2.6.9-100.orig/fs/binfmt_em86.c	2004-10-19 01:54:37.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/binfmt_em86.c	2011-06-15 19:26:19.000000000 +0400
@@ -82,7 +82,7 @@ static int load_em86(struct linux_binprm
 	 * Note that we use open_exec() as the name is now in kernel
 	 * space, and we don't need to copy it.
 	 */
-	file = open_exec(interp);
+	file = open_exec(interp, bprm);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
diff -Nurap linux-2.6.9-100.orig/fs/binfmt_flat.c linux-2.6.9-ve023stab054/fs/binfmt_flat.c
--- linux-2.6.9-100.orig/fs/binfmt_flat.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/binfmt_flat.c	2011-06-15 19:26:19.000000000 +0400
@@ -775,7 +775,7 @@ static int load_flat_shared_library(int 
 
 	/* Open the file up */
 	bprm.filename = buf;
-	bprm.file = open_exec(bprm.filename);
+	bprm.file = open_exec(bprm.filename, &bprm);
 	res = PTR_ERR(bprm.file);
 	if (IS_ERR(bprm.file))
 		return res;
diff -Nurap linux-2.6.9-100.orig/fs/binfmt_misc.c linux-2.6.9-ve023stab054/fs/binfmt_misc.c
--- linux-2.6.9-100.orig/fs/binfmt_misc.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/binfmt_misc.c	2011-06-15 19:26:19.000000000 +0400
@@ -153,7 +153,8 @@ static int load_misc_binary(struct linux
 
 		/* if the binary is not readable than enforce mm->dumpable=0
 		   regardless of the interpreter's permissions */
-		if (permission(bprm->file->f_dentry->d_inode, MAY_READ, NULL))
+		if (permission(bprm->file->f_dentry->d_inode, MAY_READ,
+					NULL, NULL))
 			bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 
 		allow_write_access(bprm->file);
@@ -182,7 +183,7 @@ static int load_misc_binary(struct linux
 
 	bprm->interp = iname;	/* for binfmt_script */
 
-	interp_file = open_exec (iname);
+	interp_file = open_exec (iname, bprm);
 	retval = PTR_ERR (interp_file);
 	if (IS_ERR (interp_file))
 		goto _error;
diff -Nurap linux-2.6.9-100.orig/fs/binfmt_script.c linux-2.6.9-ve023stab054/fs/binfmt_script.c
--- linux-2.6.9-100.orig/fs/binfmt_script.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/binfmt_script.c	2011-06-15 19:26:19.000000000 +0400
@@ -86,7 +86,7 @@ static int load_script(struct linux_binp
 	/*
 	 * OK, now restart the process with the interpreter's dentry.
 	 */
-	file = open_exec(interp);
+	file = open_exec(interp, bprm);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
diff -Nurap linux-2.6.9-100.orig/fs/block_dev.c linux-2.6.9-ve023stab054/fs/block_dev.c
--- linux-2.6.9-100.orig/fs/block_dev.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/block_dev.c	2011-06-15 19:26:19.000000000 +0400
@@ -652,9 +652,16 @@ static int do_open(struct block_device *
 {
 	struct module *owner = NULL;
 	struct gendisk *disk;
-	int ret = -ENXIO;
+	int ret;
 	int part;
 
+#ifdef CONFIG_VE
+	ret = get_device_perms_ve(S_IFBLK, bdev->bd_dev,
+				  file->f_mode&(FMODE_READ|FMODE_WRITE));
+	if (ret)
+	        return ret;
+#endif
+	ret = -ENXIO;
 	file->f_mapping = bdev->bd_inode->i_mapping;
 	lock_kernel();
 	disk = get_gendisk(bdev->bd_dev, &part);
@@ -921,7 +928,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
  * namespace if possible and return it.  Return ERR_PTR(error)
  * otherwise.
  */
-struct block_device *lookup_bdev(const char *path)
+struct block_device *lookup_bdev(const char *path, int mode)
 {
 	struct block_device *bdev;
 	struct inode *inode;
@@ -939,6 +946,11 @@ struct block_device *lookup_bdev(const c
 	error = -ENOTBLK;
 	if (!S_ISBLK(inode->i_mode))
 		goto fail;
+#ifdef CONFIG_VE
+	error = get_device_perms_ve(S_IFBLK, inode->i_rdev, mode);
+	if (error)
+		goto fail;
+#endif
 	error = -EACCES;
 	if (nd.mnt->mnt_flags & MNT_NODEV)
 		goto fail;
@@ -970,12 +982,13 @@ struct block_device *open_bdev_excl(cons
 	mode_t mode = FMODE_READ;
 	int error = 0;
 
-	bdev = lookup_bdev(path);
+	if (!(flags & MS_RDONLY))
+		mode |= FMODE_WRITE;
+
+	bdev = lookup_bdev(path, mode);
 	if (IS_ERR(bdev))
 		return bdev;
 
-	if (!(flags & MS_RDONLY))
-		mode |= FMODE_WRITE;
 	error = blkdev_get(bdev, mode, 0);
 	if (error)
 		return ERR_PTR(error);
diff -Nurap linux-2.6.9-100.orig/fs/buffer.c linux-2.6.9-ve023stab054/fs/buffer.c
--- linux-2.6.9-100.orig/fs/buffer.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/buffer.c	2011-06-15 19:26:22.000000000 +0400
@@ -383,7 +383,9 @@ static void do_sync(unsigned long wait)
 
 asmlinkage long sys_sync(void)
 {
-	do_sync(1);
+	if (sysctl_fsync_enable || ve_is_super(get_exec_env())) {
+		do_sync(1);
+	}
 	return 0;
 }
 
@@ -440,6 +442,11 @@ asmlinkage long sys_fsync(unsigned int f
 		goto out_putf;
 	}
 
+	if (!sysctl_fsync_enable && !ve_is_super(get_exec_env())) {
+		ret = 0;
+		goto out_putf;
+	}
+
 	/* We need to protect against concurrent writers.. */
 	down(&mapping->host->i_sem);
 	current->flags |= PF_SYNCWRITE;
@@ -474,6 +481,11 @@ asmlinkage long sys_fdatasync(unsigned i
 	if (!file->f_op || !file->f_op->fsync)
 		goto out_putf;
 
+	if (!sysctl_fsync_enable && !ve_is_super(get_exec_env())) {
+		ret = 0;
+		goto out_putf;
+	}
+
 	mapping = file->f_mapping;
 
 	down(&mapping->host->i_sem);
@@ -2081,7 +2093,8 @@ static int __block_prepare_write(struct 
 			if (err)
 				break;
 			if (buffer_new(bh)) {
-				unmap_underlying_metadata(bh->b_bdev,
+				if (buffer_mapped(bh))
+					unmap_underlying_metadata(bh->b_bdev,
 							bh->b_blocknr);
 				if (PageUptodate(page)) {
 					set_buffer_uptodate(bh);
@@ -2151,6 +2164,7 @@ static int __block_prepare_write(struct 
 			clear_buffer_new(bh);
 			kaddr = kmap_atomic(page, KM_USER0);
 			memset(kaddr+block_start, 0, bh->b_size);
+			flush_dcache_page(page);
 			kunmap_atomic(kaddr, KM_USER0);
 			set_buffer_uptodate(bh);
 			mark_buffer_dirty(bh);
@@ -2626,6 +2640,7 @@ failed:
 	 */
 	kaddr = kmap_atomic(page, KM_USER0);
 	memset(kaddr, 0, PAGE_CACHE_SIZE);
+	flush_dcache_page(page);
 	kunmap_atomic(kaddr, KM_USER0);
 	SetPageUptodate(page);
 	set_page_dirty(page);
@@ -3240,7 +3255,7 @@ void __init buffer_init(void)
 
 	bh_cachep = kmem_cache_create("buffer_head",
 			sizeof(struct buffer_head), 0,
-			SLAB_PANIC, init_buffer_head, NULL);
+			SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL);
 	for (i = 0; i < ARRAY_SIZE(bh_wait_queue_heads); i++)
 		init_waitqueue_head(&bh_wait_queue_heads[i].wqh);
 
@@ -3252,9 +3267,11 @@ void __init buffer_init(void)
 	hotcpu_notifier(buffer_cpu_notify, 0);
 }
 
+extern int invalidate_inodes_and_pages(struct super_block * sb, int verify);
+
 static void drop_pagecache_sb(struct super_block *sb)
 {
-	invalidate_inodes_and_pages(sb);
+	invalidate_inodes_and_pages(sb, 0);
 }
                                                                                                     
 void drop_pagecache(void)
diff -Nurap linux-2.6.9-100.orig/fs/char_dev.c linux-2.6.9-ve023stab054/fs/char_dev.c
--- linux-2.6.9-100.orig/fs/char_dev.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/char_dev.c	2011-06-15 19:26:19.000000000 +0400
@@ -299,6 +299,13 @@ int chrdev_open(struct inode * inode, st
 	struct cdev *new = NULL;
 	int ret = 0;
 
+#ifdef CONFIG_VE
+	ret = get_device_perms_ve(S_IFCHR, inode->i_rdev,
+				  filp->f_mode&(FMODE_READ|FMODE_WRITE));
+	if (ret)
+		return ret;
+#endif
+
 	spin_lock(&cdev_lock);
 	p = inode->i_cdev;
 	if (!p) {
diff -Nurap linux-2.6.9-100.orig/fs/cifs/cifsfs.c linux-2.6.9-ve023stab054/fs/cifs/cifsfs.c
--- linux-2.6.9-100.orig/fs/cifs/cifsfs.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/cifs/cifsfs.c	2011-06-15 19:26:19.000000000 +0400
@@ -238,7 +238,8 @@ cifs_statfs(struct super_block *sb, stru
 				   longer available? */
 }
 
-static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd,
+			   struct exec_perm *exec_perm)
 {
 	struct cifs_sb_info *cifs_sb;
 
@@ -250,7 +251,7 @@ static int cifs_permission(struct inode 
 		on the client (above and beyond ACL on servers) for
 		servers which do not support setting and viewing mode bits,
 		so allowing client to check permissions is useful */
-		return vfs_permission(inode, mask);
+		return vfs_permission(inode, mask, exec_perm);
 }
 
 static kmem_cache_t *cifs_inode_cachep;
diff -Nurap linux-2.6.9-100.orig/fs/coda/dir.c linux-2.6.9-ve023stab054/fs/coda/dir.c
--- linux-2.6.9-100.orig/fs/coda/dir.c	2004-10-19 01:53:21.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/coda/dir.c	2011-06-15 19:26:19.000000000 +0400
@@ -147,7 +147,8 @@ exit:
 }
 
 
-int coda_permission(struct inode *inode, int mask, struct nameidata *nd)
+int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *perm)
 {
         int error = 0;
  
diff -Nurap linux-2.6.9-100.orig/fs/coda/pioctl.c linux-2.6.9-ve023stab054/fs/coda/pioctl.c
--- linux-2.6.9-100.orig/fs/coda/pioctl.c	2004-10-19 01:54:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/coda/pioctl.c	2011-06-15 19:26:19.000000000 +0400
@@ -25,7 +25,7 @@
 
 /* pioctl ops */
 static int coda_ioctl_permission(struct inode *inode, int mask,
-				 struct nameidata *nd);
+				 struct nameidata *nd, struct exec_perm *);
 static int coda_pioctl(struct inode * inode, struct file * filp, 
                        unsigned int cmd, unsigned long user_data);
 
@@ -43,7 +43,8 @@ struct file_operations coda_ioctl_operat
 
 /* the coda pioctl inode ops */
 static int coda_ioctl_permission(struct inode *inode, int mask,
-				 struct nameidata *nd)
+				 struct nameidata *nd,
+				 struct exec_perm *exec_perm)
 {
         return 0;
 }
diff -Nurap linux-2.6.9-100.orig/fs/compat.c linux-2.6.9-ve023stab054/fs/compat.c
--- linux-2.6.9-100.orig/fs/compat.c	2011-06-09 19:22:59.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/compat.c	2011-06-15 19:26:22.000000000 +0400
@@ -25,6 +25,7 @@
 #include <linux/file.h>
 #include <linux/vfs.h>
 #include <linux/ioctl32.h>
+#include <linux/virtinfo.h>
 #include <linux/init.h>
 #include <linux/sockios.h>	/* for SIOCDEVPRIVATE */
 #include <linux/smb.h>
@@ -157,6 +158,8 @@ asmlinkage long compat_sys_statfs(const 
 	if (!error) {
 		struct kstatfs tmp;
 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
+		if (!error)
+			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
 		if (!error && put_compat_statfs(buf, &tmp))
 			error = -EFAULT;
 		path_release(&nd);
@@ -175,6 +178,8 @@ asmlinkage long compat_sys_fstatfs(unsig
 	if (!file)
 		goto out;
 	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
+	if (!error)
+		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
 	if (!error && put_compat_statfs(buf, &tmp))
 		error = -EFAULT;
 	fput(file);
@@ -218,6 +223,8 @@ asmlinkage long compat_statfs64(const ch
 	if (!error) {
 		struct kstatfs tmp;
 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
+		if (!error)
+			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
 		if (!error && put_compat_statfs64(buf, &tmp))
 			error = -EFAULT;
 		path_release(&nd);
@@ -239,6 +246,8 @@ asmlinkage long compat_fstatfs64(unsigne
 	if (!file)
 		goto out;
 	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
+	if (!error)
+		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
 	if (!error && put_compat_statfs64(buf, &tmp))
 		error = -EFAULT;
 	fput(file);
@@ -384,6 +393,12 @@ asmlinkage long compat_sys_ioctl(unsigne
 		goto out;
 	}
 
+	if (filp->f_op && filp->f_op->compat_ioctl) {
+		error = filp->f_op->compat_ioctl(filp, cmd, arg);
+		if (error != -ENOIOCTLCMD)
+			goto out;
+	}
+
 	down_read(&ioctl32_sem);
 
 	t = ioctl32_hash_table[ioctl32_hash (cmd)];
@@ -1437,20 +1452,24 @@ int compat_do_execve(char * filename,
 	int retval;
 	int i;
 
-	file = open_exec(filename);
-
-	retval = PTR_ERR(file);
-	if (IS_ERR(file))
+	retval = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
+	if (retval)
 		return retval;
 
-	sched_exec();
-
 	retval = -ENOMEM;
 	bprm = kmalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
-		goto out_ret;
+		return ERR_PTR(-ENOMEM);
 	memset(bprm, 0, sizeof(*bprm));
 
+	file = open_exec(filename, bprm);
+
+	retval = PTR_ERR(file);
+	if (IS_ERR(file))
+		goto out_open;
+
+	sched_exec();
+
 	bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
 	bprm->file = file;
 	bprm->filename = filename;
@@ -1522,8 +1541,8 @@ out_file:
 		allow_write_access(bprm->file);
 		fput(bprm->file);
 	}
+out_open:
 	kfree(bprm);
-
 out_ret:
 	return retval;
 }
diff -Nurap linux-2.6.9-100.orig/fs/compat_ioctl.c linux-2.6.9-ve023stab054/fs/compat_ioctl.c
--- linux-2.6.9-100.orig/fs/compat_ioctl.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/compat_ioctl.c	2011-06-15 19:26:18.000000000 +0400
@@ -654,8 +654,11 @@ int siocdevprivate_ioctl(unsigned int fd
 	/* Don't check these user accesses, just let that get trapped
 	 * in the ioctl handler instead.
 	 */
-	copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], IFNAMSIZ);
-	__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data);
+	if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
+			 IFNAMSIZ))
+		return -EFAULT;
+	if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
+		return -EFAULT;
 
 	return sys_ioctl(fd, cmd, (unsigned long) u_ifreq64);
 }
@@ -693,6 +696,11 @@ static int dev_ifsioc(unsigned int fd, u
 	set_fs (old_fs);
 	if (!err) {
 		switch (cmd) {
+		/* TUNSETIFF is defined as _IOW, it should be _IORW
+		 * as the data is copied back to user space, but that
+		 * cannot be fixed without breaking all existing apps.
+		 */
+		case TUNSETIFF:
 		case SIOCGIFFLAGS:
 		case SIOCGIFMETRIC:
 		case SIOCGIFMTU:
@@ -2527,7 +2535,8 @@ static int serial_struct_ioctl(unsigned 
         if (cmd == TIOCSSERIAL) {
                 if (verify_area(VERIFY_READ, ss32, sizeof(SS32)))
                         return -EFAULT;
-                __copy_from_user(&ss, ss32, offsetof(SS32, iomem_base));
+                if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base)))
+			return -EFAULT;
                 __get_user(udata, &ss32->iomem_base);
                 ss.iomem_base = compat_ptr(udata);
                 __get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift);
@@ -2540,7 +2549,8 @@ static int serial_struct_ioctl(unsigned 
         if (cmd == TIOCGSERIAL && err >= 0) {
                 if (verify_area(VERIFY_WRITE, ss32, sizeof(SS32)))
                         return -EFAULT;
-                __copy_to_user(ss32,&ss,offsetof(SS32,iomem_base));
+                if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base)))
+			return -EFAULT;
                 __put_user((unsigned long)ss.iomem_base  >> 32 ?
                             0xffffffff : (unsigned)(unsigned long)ss.iomem_base,
                             &ss32->iomem_base);
diff -Nurap linux-2.6.9-100.orig/fs/dcache.c linux-2.6.9-ve023stab054/fs/dcache.c
--- linux-2.6.9-100.orig/fs/dcache.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/dcache.c	2011-06-15 19:26:22.000000000 +0400
@@ -19,6 +19,7 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/init.h>
 #include <linux/smp_lock.h>
 #include <linux/hash.h>
@@ -26,12 +27,17 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/file.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <linux/security.h>
 #include <linux/seqlock.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
 #include <linux/audit.h>
+#include <linux/kernel_stat.h>
+
+#include <ub/ub_dcache.h>
+#include <ub/ub_dcache_op.h>
 
 /* #define DCACHE_DEBUG 1 */
 
@@ -43,9 +49,7 @@ seqlock_t rename_lock __cacheline_aligne
 
 EXPORT_SYMBOL(dcache_lock);
 
-static kmem_cache_t *dentry_cache; 
-
-#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
+kmem_cache_t *dentry_cache; 
 
 /*
  * This is the single most critical data structure when it comes
@@ -72,8 +76,11 @@ static void d_callback(struct rcu_head *
 {
 	struct dentry * dentry = container_of(head, struct dentry, d_rcu);
 
+	ub_dentry_free(dentry);
 	if (dname_external(dentry))
 		kfree(dentry->d_name.name);
+	dentry->d_op = (struct dentry_operations *)0xbaddc;
+	BUG_ON(!d_unhashed(dentry));
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
@@ -89,6 +96,7 @@ static void d_free(struct dentry *dentry
 		kfree(dentry->d_extra_attributes);
 		dentry->d_extra_attributes = NULL;
 	}
+	BUG_ON(!d_unhashed(dentry));
  	call_rcu(&dentry->d_rcu, d_callback);
 }
 
@@ -142,27 +150,22 @@ static inline void dentry_iput(struct de
  * releasing its resources. If the parent dentries were scheduled for release
  * they too may now get deleted.
  *
+ * This traverse upward doesn't change d_inuse of any dentry
+ *
  * no dcache lock, please.
+ * preemption is disabled by the caller.
  */
 
-void dput(struct dentry *dentry)
+void dput_recursive(struct dentry *dentry)
 {
-	if (!dentry)
-		return;
-
 repeat:
-	if (atomic_read(&dentry->d_count) == 1)
-		might_sleep();
 	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
-		return;
+		goto out_preempt;
 
 	spin_lock(&dentry->d_lock);
-	if (atomic_read(&dentry->d_count)) {
-		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
-		return;
-	}
-			
+	if (atomic_read(&dentry->d_count))
+		goto out_unlock;
+
 	/*
 	 * AV: ->d_delete() is _NOT_ allowed to block now.
 	 */
@@ -176,10 +179,14 @@ repeat:
   	if (list_empty(&dentry->d_lru)) {
   		dentry->d_flags |= DCACHE_REFERENCED;
   		list_add(&dentry->d_lru, &dentry_unused);
+		list_add(&dentry->d_sb_lru, &dentry->d_sb->s_dentry_unused);
   		dentry_stat.nr_unused++;
   	}
+out_unlock:
  	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
+out_preempt:
+	preempt_enable();
 	return;
 
 unhash_it:
@@ -193,21 +200,37 @@ kill_it: {
 		 */
   		if (!list_empty(&dentry->d_lru)) {
   			list_del(&dentry->d_lru);
+  			list_del(&dentry->d_sb_lru);
   			dentry_stat.nr_unused--;
   		}
   		list_del(&dentry->d_child);
 		dentry_stat.nr_dentry--;	/* For d_free, below */
+		preempt_enable_no_resched();
 		/*drops the locks, at that point nobody can reach this dentry */
 		dentry_iput(dentry);
 		parent = dentry->d_parent;
 		d_free(dentry);
 		if (dentry == parent)
 			return;
+		preempt_disable();
 		dentry = parent;
 		goto repeat;
 	}
 }
 
+void dput(struct dentry *dentry)
+{
+	if (!dentry)
+		return;
+
+	if (atomic_read(&dentry->d_count) == 1)
+		might_sleep();
+
+	preempt_disable();
+	ub_dentry_uncharge(dentry);
+	dput_recursive(dentry);
+}
+
 /**
  * d_invalidate - invalidate a dentry
  * @dentry: dentry to invalidate
@@ -273,7 +296,10 @@ static inline struct dentry * __dget_loc
 	if (!list_empty(&dentry->d_lru)) {
 		dentry_stat.nr_unused--;
 		list_del_init(&dentry->d_lru);
+		list_del_init(&dentry->d_sb_lru);
 	}
+
+	ub_dentry_charge_nofail(dentry);
 	return dentry;
 }
 
@@ -346,13 +372,16 @@ restart:
 	tmp = head;
 	while ((tmp = tmp->next) != head) {
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+		spin_lock(&dentry->d_lock);
 		if (!atomic_read(&dentry->d_count)) {
 			__dget_locked(dentry);
 			__d_drop(dentry);
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 			dput(dentry);
 			goto restart;
 		}
+		spin_unlock(&dentry->d_lock);
 	}
 	spin_unlock(&dcache_lock);
 }
@@ -373,8 +402,14 @@ static inline void prune_one_dentry(stru
 	dentry_iput(dentry);
 	parent = dentry->d_parent;
 	d_free(dentry);
-	if (parent != dentry)
-		dput(parent);
+	if (parent != dentry) {
+		/*
+		 * dentry is not in use, only child (not outside)
+		 * references change, so parent->d_inuse does not change
+		 */
+		preempt_disable();
+		dput_recursive(parent);
+	}
 	spin_lock(&dcache_lock);
 }
 
@@ -395,6 +430,10 @@ static inline void prune_one_dentry(stru
  
 static void prune_dcache(int count, struct super_block *sb)
 {
+	struct list_head *lru_head;
+
+	lru_head = sb ? &sb->s_dentry_unused : &dentry_unused;
+
 	spin_lock(&dcache_lock);
 	for (; count ; count--) {
 		struct dentry *dentry;
@@ -403,26 +442,16 @@ static void prune_dcache(int count, stru
 
 		cond_resched_lock(&dcache_lock);
 
-		tmp = dentry_unused.prev;
-		if (unlikely(sb)) {
-			/* Try to find a dentry for this sb, but don't try
-			 * too hard, if they aren't near the tail they will
-			 * be moved down again soon
-			 */
-			int skip = count;
-			while (skip &&
-			       tmp != &dentry_unused &&
-			       list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
-				skip--;
-				tmp = tmp->prev;
-			}
-		}
-		if (tmp == &dentry_unused)
+		tmp = lru_head->prev;
+		if (tmp == lru_head)
 			break;
-		list_del_init(tmp);
-		prefetch(dentry_unused.prev);
- 		dentry_stat.nr_unused--;
-		dentry = list_entry(tmp, struct dentry, d_lru);
+
+		prefetch(lru_head->prev);
+		dentry_stat.nr_unused--;
+		dentry = sb ? list_entry(tmp, struct dentry, d_sb_lru) :
+			      list_entry(tmp, struct dentry, d_lru);
+		list_del_init(&dentry->d_lru);
+		list_del_init(&dentry->d_sb_lru);
 
  		spin_lock(&dentry->d_lock);
 		/*
@@ -438,6 +467,8 @@ static void prune_dcache(int count, stru
 		if (dentry->d_flags & DCACHE_REFERENCED) {
 			dentry->d_flags &= ~DCACHE_REFERENCED;
  			list_add(&dentry->d_lru, &dentry_unused);
+			list_add(&dentry->d_sb_lru,
+					&dentry->d_sb->s_dentry_unused);
  			dentry_stat.nr_unused++;
  			spin_unlock(&dentry->d_lock);
 			continue;
@@ -453,7 +484,8 @@ static void prune_dcache(int count, stru
 		 * If this dentry is for "my" filesystem, then I can prune it
 		 * without taking the s_umount lock (I already hold it).
 		 */
-		if (sb && dentry->d_sb == sb) {
+		if (sb) {
+			BUG_ON(dentry->d_sb != sb);
 			prune_one_dentry(dentry);
 			continue;
 		}
@@ -509,45 +541,22 @@ static void prune_dcache(int count, stru
 
 void shrink_dcache_sb(struct super_block * sb)
 {
-	struct list_head *tmp, *next;
 	struct dentry *dentry;
 
-	/*
-	 * Pass one ... move the dentries for the specified
-	 * superblock to the most recent end of the unused list.
-	 */
 	spin_lock(&dcache_lock);
-	next = dentry_unused.next;
-	while (next != &dentry_unused) {
-		tmp = next;
-		next = tmp->next;
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
-		list_del(tmp);
-		list_add(tmp, &dentry_unused);
-	}
-
-	/*
-	 * Pass two ... free the dentries for this superblock.
-	 */
-repeat:
-	next = dentry_unused.next;
-	while (next != &dentry_unused) {
-		tmp = next;
-		next = tmp->next;
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
+	while (!list_empty(&sb->s_dentry_unused)) {
+		dentry = list_first_entry(&sb->s_dentry_unused,
+				struct dentry, d_sb_lru);
 		dentry_stat.nr_unused--;
-		list_del_init(tmp);
+		list_del_init(&dentry->d_lru);
+		list_del_init(&dentry->d_sb_lru);
 		spin_lock(&dentry->d_lock);
 		if (atomic_read(&dentry->d_count)) {
 			spin_unlock(&dentry->d_lock);
 			continue;
 		}
 		prune_one_dentry(dentry);
-		goto repeat;
+		cond_resched_lock(&dcache_lock);
 	}
 	spin_unlock(&dcache_lock);
 }
@@ -629,16 +638,28 @@ resume:
 		if (!list_empty(&dentry->d_lru)) {
 			dentry_stat.nr_unused--;
 			list_del_init(&dentry->d_lru);
+			list_del_init(&dentry->d_sb_lru);
 		}
 		/* 
 		 * move only zero ref count dentries to the end 
 		 * of the unused list for prune_dcache
 		 */
 		if (!atomic_read(&dentry->d_count)) {
-			list_add(&dentry->d_lru, dentry_unused.prev);
+			list_add_tail(&dentry->d_lru, &dentry_unused);
+			list_add_tail(&dentry->d_sb_lru,
+					&dentry->d_sb->s_dentry_unused);
 			dentry_stat.nr_unused++;
 			found++;
 		}
+
+		/*
+		 * We can return to the caller if we have found some (this
+		 * ensures forward progress). We'll be coming back to find
+		 * the rest.
+		 */
+		if (found && need_resched())
+			goto out;
+
 		/*
 		 * Descend a level if the d_subdirs list is non-empty.
 		 */
@@ -663,6 +684,7 @@ this_parent->d_parent->d_name.name, this
 #endif
 		goto resume;
 	}
+out:
 	spin_unlock(&dcache_lock);
 	return found;
 }
@@ -705,6 +727,7 @@ void shrink_dcache_anon(struct super_blo
 			if (!list_empty(&this->d_lru)) {
 				dentry_stat.nr_unused--;
 				list_del_init(&this->d_lru);
+				list_del_init(&this->d_sb_lru);
 			}
 
 			/* 
@@ -713,6 +736,8 @@ void shrink_dcache_anon(struct super_blo
 			 */
 			if (!atomic_read(&this->d_count)) {
 				list_add_tail(&this->d_lru, &dentry_unused);
+				list_add_tail(&this->d_sb_lru,
+						&this->d_sb->s_dentry_unused);
 				dentry_stat.nr_unused++;
 				found++;
 			}
@@ -736,12 +761,18 @@ void shrink_dcache_anon(struct super_blo
  */
 static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
 {
+	int res = -1;
+
+	KSTAT_PERF_ENTER(shrink_dcache)
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
-			return -1;
+			goto out;
 		prune_dcache(nr, NULL);
 	}
-	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+	res = (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+out:
+	KSTAT_PERF_LEAVE(shrink_dcache)
+	return res;
 }
 
 /**
@@ -759,21 +790,26 @@ struct dentry *d_alloc(struct dentry * p
 	struct dentry *dentry;
 	char *dname;
 
+	dname = NULL;
+	if (name->len > DNAME_INLINE_LEN-1) {
+		dname = kmalloc(name->len + 1, GFP_KERNEL);
+		if (!dname)
+			goto err_name;
+	}
+
+	ub_dentry_alloc_start();
 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
 	if (!dentry)
-		return NULL;
+		goto err_dentry;
 
-	if (name->len > DNAME_INLINE_LEN-1) {
-		dname = kmalloc(name->len + 1, GFP_KERNEL);
-		if (!dname) {
-			kmem_cache_free(dentry_cache, dentry); 
-			return NULL;
-		}
-	} else  {
+	preempt_disable();
+	if (dname == NULL)
 		dname = dentry->d_iname;
-	}	
 	dentry->d_name.name = dname;
 
+	if (ub_dentry_alloc(dentry))
+		goto err_charge;
+
 	dentry->d_name.len = name->len;
 	dentry->d_name.hash = name->hash;
 	memcpy(dname, name->name, name->len);
@@ -793,6 +829,7 @@ struct dentry *d_alloc(struct dentry * p
 	dentry->d_bucket = NULL;
 	INIT_HLIST_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
+	INIT_LIST_HEAD(&dentry->d_sb_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
 	INIT_LIST_HEAD(&dentry->d_alias);
 
@@ -804,12 +841,27 @@ struct dentry *d_alloc(struct dentry * p
 	}
 
 	spin_lock(&dcache_lock);
-	if (parent)
+	if (parent) {
 		list_add(&dentry->d_child, &parent->d_subdirs);
+		if (parent->d_flags & DCACHE_VIRTUAL)
+			dentry->d_flags |= DCACHE_VIRTUAL;
+	}
 	dentry_stat.nr_dentry++;
 	spin_unlock(&dcache_lock);
+	preempt_enable();
+	ub_dentry_alloc_end();
 
 	return dentry;
+
+err_charge:
+	preempt_enable();
+	kmem_cache_free(dentry_cache, dentry); 
+err_dentry:
+	if (name->len > DNAME_INLINE_LEN - 1)
+		kfree(dname);
+	ub_dentry_alloc_end();
+err_name:
+	return NULL;
 }
 
 /**
@@ -1088,6 +1140,8 @@ struct dentry * __d_lookup(struct dentry
 			atomic_inc(&dentry->d_count);
 			found = dentry;
 			audit_update_watch(dentry, 0);
+			if (ub_dentry_charge(found))
+				goto charge_failure;
 		}
 terminate:
 		spin_unlock(&dentry->d_lock);
@@ -1098,6 +1152,14 @@ next:
  	rcu_read_unlock();
 
  	return found;
+
+charge_failure:
+	spin_unlock(&found->d_lock);
+	rcu_read_unlock();
+	/* dentry is now unhashed, just kill it */
+	dput(found);
+	/* ... and fail lookup */
+	return NULL;
 }
 
 /**
@@ -1207,6 +1269,7 @@ void d_rehash(struct dentry * entry)
 
 	spin_lock(&dcache_lock);
 	spin_lock(&entry->d_lock);
+	BUG_ON(!(entry->d_flags & DCACHE_UNHASHED));
  	entry->d_flags &= ~DCACHE_UNHASHED;
 	spin_unlock(&entry->d_lock);
 	entry->d_bucket = list;
@@ -1605,6 +1668,32 @@ struct dentry *d_instantiate_unique(stru
 }
 
 /**
+ * __d_path_add_deleted - prepend "(deleted) " text
+ * @end: a pointer to the character after free space at the beginning of the
+ *       buffer
+ * @buflen: remaining free space
+ */
+static inline char * __d_path_add_deleted(char * end, int buflen)
+{
+	buflen -= 10;
+	if (buflen < 0)
+		return ERR_PTR(-ENAMETOOLONG);
+	end -= 10;
+	memcpy(end, "(deleted) ", 10);
+	return end;
+}
+
+/**
+ * d_root_check - checks if dentry is accessible from current's fs root
+ * @dentry: dentry to be verified
+ * @vfsmnt: vfsmnt to which the dentry belongs
+ */
+int d_root_check(struct dentry *dentry, struct vfsmount *vfsmnt)
+{
+	return PTR_ERR(d_path(dentry, vfsmnt, NULL, 0));
+}
+
+/**
  * d_path - return the path of a dentry
  * @dentry: dentry to report
  * @vfsmnt: vfsmnt to which the dentry belongs
@@ -1625,36 +1714,35 @@ char * __d_path( struct dentry *dentry, 
 			char *buffer, int buflen)
 {
 	char * end = buffer+buflen;
-	char * retval;
+	char * retval = NULL;
 	int namelen;
+	int deleted;
+	struct vfsmount *oldvfsmnt;
 
-	*--end = '\0';
-	buflen--;
-	if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
-		buflen -= 10;
-		end -= 10;
-		if (buflen < 0)
+	oldvfsmnt = vfsmnt;
+	deleted = (!IS_ROOT(dentry) && d_unhashed(dentry));
+	if (buffer != NULL) {
+		*--end = '\0';
+		buflen--;
+
+		if (buflen < 1)
 			goto Elong;
-		memcpy(end, " (deleted)", 10);
+		/* Get '/' right */
+		retval = end-1;
+		*retval = '/';
 	}
 
-	if (buflen < 1)
-		goto Elong;
-	/* Get '/' right */
-	retval = end-1;
-	*retval = '/';
-
 	for (;;) {
 		struct dentry * parent;
 
 		if (dentry == root && vfsmnt == rootmnt)
 			break;
 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
-			/* Global root? */
+			/* root of a tree? */
 			spin_lock(&vfsmount_lock);
 			if (vfsmnt->mnt_parent == vfsmnt) {
 				spin_unlock(&vfsmount_lock);
-				goto global_root;
+				goto other_root;
 			}
 			dentry = vfsmnt->mnt_mountpoint;
 			vfsmnt = vfsmnt->mnt_parent;
@@ -1663,27 +1751,51 @@ char * __d_path( struct dentry *dentry, 
 		}
 		parent = dentry->d_parent;
 		prefetch(parent);
+		if (buffer != NULL) {
+			namelen = dentry->d_name.len;
+			buflen -= namelen + 1;
+			if (buflen < 0)
+				goto Elong;
+			end -= namelen;
+			memcpy(end, dentry->d_name.name, namelen);
+			*--end = '/';
+			retval = end;
+		}
+		dentry = parent;
+	}
+	/* the given root point is reached */
+finish:
+	if (buffer != NULL && deleted)
+		retval = __d_path_add_deleted(end, buflen);
+	return retval;
+
+other_root:
+	/*
+	 * We traversed the tree upward and reached a root, but the given
+	 * lookup terminal point wasn't encountered.  It means either that the
+	 * dentry is out of our scope or belongs to an abstract space like
+	 * sock_mnt or pipe_mnt.  Check for it.
+	 *
+	 * There are different options to check it.
+	 * We may assume that any dentry tree is unreachable unless it's
+	 * connected to `root' (defined as fs root of init aka child reaper)
+	 * and expose all paths that are not connected to it.
+	 * The other option is to allow exposing of known abstract spaces
+	 * explicitly and hide the path information for other cases.
+	 * This approach is more safe, let's take it.  2001/04/22  SAW
+	 */
+	if (!(oldvfsmnt->mnt_sb->s_flags & MS_NOUSER))
+		return ERR_PTR(-EINVAL);
+	if (buffer != NULL) {
 		namelen = dentry->d_name.len;
-		buflen -= namelen + 1;
+		buflen -= namelen;
 		if (buflen < 0)
 			goto Elong;
-		end -= namelen;
-		memcpy(end, dentry->d_name.name, namelen);
-		*--end = '/';
-		retval = end;
-		dentry = parent;
+		retval -= namelen-1;	/* hit the slash */
+		memcpy(retval, dentry->d_name.name, namelen);
 	}
+	goto finish;
 
-	return retval;
-
-global_root:
-	namelen = dentry->d_name.len;
-	buflen -= namelen;
-	if (buflen < 0)
-		goto Elong;
-	retval -= namelen-1;	/* hit the slash */
-	memcpy(retval, dentry->d_name.name, namelen);
-	return retval;
 Elong:
 	return ERR_PTR(-ENAMETOOLONG);
 }
@@ -1710,6 +1822,226 @@ char * d_path(struct dentry *dentry, str
 	return res;
 }
 
+#ifdef CONFIG_VE
+#include <net/sock.h>
+#include <linux/ip.h>
+#include <linux/file.h>
+#include <linux/namespace.h>
+#include <linux/vzratelimit.h>
+
+static void mark_sub_tree_virtual(struct dentry *d)
+{
+	struct dentry *orig_root;
+
+	orig_root = d;
+	while (1) {
+		spin_lock(&d->d_lock);
+		d->d_flags |= DCACHE_VIRTUAL;
+		spin_unlock(&d->d_lock);
+
+		if (!list_empty(&d->d_subdirs)) {
+			d = list_entry(d->d_subdirs.next,
+					struct dentry, d_child);
+			continue;
+		}
+		if (d == orig_root)
+			break;
+		while (d == list_entry(d->d_parent->d_subdirs.prev,
+					struct dentry, d_child)) {
+			d = d->d_parent;
+			if (d == orig_root)
+				goto out;
+		}
+		d = list_entry(d->d_child.next,
+				struct dentry, d_child);
+	}
+out:
+	return;
+}
+
+void mark_tree_virtual(struct vfsmount *m, struct dentry *d)
+{
+	struct vfsmount *orig_rootmnt;
+
+	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
+	orig_rootmnt = m;
+	while (1) {
+		mark_sub_tree_virtual(d);
+		if (!list_empty(&m->mnt_mounts)) {
+			m = list_entry(m->mnt_mounts.next,
+					struct vfsmount, mnt_child);
+			d = m->mnt_root;
+			continue;
+		}
+		if (m == orig_rootmnt)
+			break;
+		while (m == list_entry(m->mnt_parent->mnt_mounts.prev,
+					struct vfsmount, mnt_child)) {
+			m = m->mnt_parent;
+			if (m == orig_rootmnt)
+				goto out;
+		}
+		m = list_entry(m->mnt_child.next,
+				struct vfsmount, mnt_child);
+		d = m->mnt_root;
+	}
+out:
+	spin_unlock(&vfsmount_lock);
+	spin_unlock(&dcache_lock);
+}
+EXPORT_SYMBOL(mark_tree_virtual);
+
+static struct vz_rate_info area_ri = { 20, 10*HZ };
+#define VE_AREA_ACC_CHECK	0x0001
+#define VE_AREA_ACC_DENY	0x0002
+#define VE_AREA_EXEC_CHECK	0x0010
+#define VE_AREA_EXEC_DENY	0x0020
+#define VE0_AREA_ACC_CHECK	0x0100
+#define VE0_AREA_ACC_DENY	0x0200
+#define VE0_AREA_EXEC_CHECK	0x1000
+#define VE0_AREA_EXEC_DENY	0x2000
+int ve_area_access_check = 0;
+
+static void print_connection_info(struct task_struct *tsk)
+{
+	struct files_struct *files;
+	int fd;
+
+	files = get_files_struct(tsk);
+	if (!files)
+		return;
+
+	spin_lock(&files->file_lock);
+	for (fd = 0; fd < files->max_fds; fd++) {
+		struct file *file;
+		struct inode *inode;
+		struct socket *socket;
+		struct sock *sk;
+		struct inet_opt *inet;
+
+		file = files->fd[fd];
+		if (file == NULL)
+			continue;
+
+		inode = file->f_dentry->d_inode;
+		if (!inode->i_sock)
+			continue;
+
+		socket = SOCKET_I(inode);
+		if (socket == NULL)
+			continue;
+
+		sk = socket->sk;
+		if (sk->sk_family != PF_INET || sk->sk_type != SOCK_STREAM)
+			continue;
+
+		inet = inet_sk(sk);
+		printk(KERN_ALERT "connection from %u.%u.%u.%u:%u to port %u\n",
+				NIPQUAD(inet->daddr), ntohs(inet->dport),
+				inet->num);
+	}
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+}
+
+static void check_alert(struct vfsmount *vfsmnt, struct dentry *dentry,
+		char *str)
+{
+	struct task_struct *tsk;
+	unsigned long page;
+	struct super_block *sb;
+	char *p;
+
+	if (!vz_ratelimit(&area_ri))
+		return;
+
+	tsk = current;
+	p = ERR_PTR(-ENOMEM);
+	page = __get_free_page(GFP_KERNEL);
+	if (page) {
+		spin_lock(&dcache_lock);
+		p = __d_path(dentry, vfsmnt, tsk->fs->root, tsk->fs->rootmnt,
+				(char *)page, PAGE_SIZE);
+		spin_unlock(&dcache_lock);
+	}
+	if (IS_ERR(p))
+		p = "(undefined)";
+
+	sb = dentry->d_sb;
+	printk(KERN_ALERT "%s check alert! file:[%s] from %d/%s, dev%x\n"
+			"Task %d/%d[%s] from VE%d, execenv %d\n",
+			str, p,	VE_OWNER_FSTYPE(sb->s_type)->veid,
+			sb->s_type->name, sb->s_dev,
+			tsk->pid, virt_pid(tsk), tsk->comm,
+			VE_TASK_INFO(tsk)->owner_env->veid,
+			get_exec_env()->veid);
+
+	free_page(page);
+
+	print_connection_info(tsk);
+
+	read_lock(&tasklist_lock);
+	tsk = tsk->real_parent;
+	get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+
+	printk(KERN_ALERT "Parent %d/%d[%s] from VE%d\n",
+			tsk->pid, virt_pid(tsk), tsk->comm,
+			VE_TASK_INFO(tsk)->owner_env->veid);
+
+	print_connection_info(tsk);
+	put_task_struct(tsk);
+	dump_stack();
+}
+#endif
+
+int check_area_access_ve(struct dentry *dentry, struct vfsmount *mnt)
+{
+#ifdef CONFIG_VE
+	int check, alert, deny;
+
+	if (ve_is_super(get_exec_env())) {
+		check = ve_area_access_check & VE0_AREA_ACC_CHECK;
+		alert = dentry->d_flags & DCACHE_VIRTUAL;
+		deny = ve_area_access_check & VE0_AREA_ACC_DENY;
+	} else {
+		check = ve_area_access_check & VE_AREA_ACC_CHECK;
+		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
+		deny = ve_area_access_check & VE_AREA_ACC_DENY;
+	}
+
+	if (check && alert)
+		check_alert(mnt, dentry, "Access");
+	if (deny && alert)
+		return -EACCES;
+#endif
+	return 0;
+}
+
+int check_area_execute_ve(struct dentry *dentry, struct vfsmount *mnt)
+{
+#ifdef CONFIG_VE
+	int check, alert, deny;
+
+	if (ve_is_super(get_exec_env())) {
+		check = ve_area_access_check & VE0_AREA_EXEC_CHECK;
+		alert = dentry->d_flags & DCACHE_VIRTUAL;
+		deny = ve_area_access_check & VE0_AREA_EXEC_DENY;
+	} else {
+		check = ve_area_access_check & VE_AREA_EXEC_CHECK;
+		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
+		deny = ve_area_access_check & VE_AREA_EXEC_DENY;
+	}
+
+	if (check && alert)
+		check_alert(mnt, dentry, "Exec");
+	if (deny && alert)
+		return -EACCES;
+#endif
+	return 0;
+}
+
 /*
  * NOTE! The user-level library version returns a
  * character pointer. The kernel system call just
@@ -1846,10 +2178,12 @@ resume:
 			goto repeat;
 		}
 		atomic_dec(&dentry->d_count);
+		ub_dentry_uncharge_locked(dentry);
 	}
 	if (this_parent != root) {
 		next = this_parent->d_child.next; 
 		atomic_dec(&this_parent->d_count);
+		ub_dentry_uncharge_locked(this_parent);
 		this_parent = this_parent->d_parent;
 		goto resume;
 	}
diff -Nurap linux-2.6.9-100.orig/fs/dcookies.c linux-2.6.9-ve023stab054/fs/dcookies.c
--- linux-2.6.9-100.orig/fs/dcookies.c	2004-10-19 01:53:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/dcookies.c	2011-06-15 19:26:19.000000000 +0400
@@ -93,12 +93,10 @@ static struct dcookie_struct * alloc_dco
 	if (!dcs)
 		return NULL;
 
-	atomic_inc(&dentry->d_count);
-	atomic_inc(&vfsmnt->mnt_count);
 	dentry->d_cookie = dcs;
 
-	dcs->dentry = dentry;
-	dcs->vfsmnt = vfsmnt;
+	dcs->dentry = dget(dentry);
+	dcs->vfsmnt = mntget(vfsmnt);
 	hash_dcookie(dcs);
 
 	return dcs;
diff -Nurap linux-2.6.9-100.orig/fs/devpts/inode.c linux-2.6.9-ve023stab054/fs/devpts/inode.c
--- linux-2.6.9-100.orig/fs/devpts/inode.c	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/devpts/inode.c	2011-06-15 19:26:19.000000000 +0400
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/ve.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/namei.h>
@@ -43,13 +44,29 @@ struct inode_operations devpts_file_inod
 static struct vfsmount *devpts_mnt;
 static struct dentry *devpts_root;
 
-static struct {
-	int setuid;
-	int setgid;
-	uid_t   uid;
-	gid_t   gid;
-	umode_t mode;
-} config = {.mode = 0600};
+void prepare_devpts(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->devpts_mnt = devpts_mnt;
+	devpts_mnt = (struct vfsmount *)0x11121314;
+
+	/* ve0.devpts_root should be filled inside fill_super() */
+	BUG_ON(devpts_root != NULL);
+	devpts_root = (struct dentry *)0x12131415;
+#endif
+}
+
+#ifndef CONFIG_VE
+#define visible_devpts_mnt	devpts_mnt
+#define visible_devpts_root	devpts_root
+#define visible_devpts_config	config
+#else
+#define visible_devpts_mnt	(get_exec_env()->devpts_mnt)
+#define visible_devpts_root	(get_exec_env()->devpts_root)
+#define visible_devpts_config	(*(get_exec_env()->devpts_config))
+#endif
+
+static struct devpts_config config = {.mode = 0600};
 
 static int devpts_remount(struct super_block *sb, int *flags, char *data)
 {
@@ -75,15 +92,16 @@ static int devpts_remount(struct super_b
 		} else if (sscanf(this_char, "mode=%o%c", &n, &dummy) == 1)
 			mode = n & ~S_IFMT;
 		else {
-			printk("devpts: called with bogus options\n");
+			ve_printk(VE_LOG,
+					"devpts: called with bogus options\n");
 			return -EINVAL;
 		}
 	}
-	config.setuid  = setuid;
-	config.setgid  = setgid;
-	config.uid     = uid;
-	config.gid     = gid;
-	config.mode    = mode;
+	visible_devpts_config.setuid  = setuid;
+	visible_devpts_config.setgid  = setgid;
+	visible_devpts_config.uid     = uid;
+	visible_devpts_config.gid     = gid;
+	visible_devpts_config.mode    = mode;
 
 	return 0;
 }
@@ -117,10 +135,10 @@ devpts_fill_super(struct super_block *s,
 	inode->i_fop = &simple_dir_operations;
 	inode->i_nlink = 2;
 
-	devpts_root = s->s_root = d_alloc_root(inode);
+	visible_devpts_root = s->s_root = d_alloc_root(inode);
 	if (s->s_root)
 		return 0;
-	
+
 	printk("devpts: get root dentry failed\n");
 	iput(inode);
 fail:
@@ -133,13 +151,15 @@ static struct super_block *devpts_get_sb
 	return get_sb_single(fs_type, flags, data, devpts_fill_super);
 }
 
-static struct file_system_type devpts_fs_type = {
+struct file_system_type devpts_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "devpts",
 	.get_sb		= devpts_get_sb,
 	.kill_sb	= kill_anon_super,
 };
 
+EXPORT_SYMBOL(devpts_fs_type);
+
 /*
  * The normal naming convention is simply /dev/pts/<number>; this conforms
  * to the System V naming convention
@@ -148,7 +168,7 @@ static struct file_system_type devpts_fs
 static struct dentry *get_node(int num)
 {
 	char s[12];
-	struct dentry *root = devpts_root;
+	struct dentry *root = visible_devpts_root;
 	down(&root->d_inode->i_sem);
 	return lookup_one_len(s, root, sprintf(s, "%d", num));
 }
@@ -159,7 +179,7 @@ int devpts_pty_new(struct tty_struct *tt
 	struct tty_driver *driver = tty->driver;
 	dev_t device = MKDEV(driver->major, driver->minor_start+number);
 	struct dentry *dentry;
-	struct inode *inode = new_inode(devpts_mnt->mnt_sb);
+	struct inode *inode = new_inode(visible_devpts_mnt->mnt_sb);
 
 	/* We're supposed to be given the slave end of a pty */
 	BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY);
@@ -170,10 +190,12 @@ int devpts_pty_new(struct tty_struct *tt
 
 	inode->i_ino = number+2;
 	inode->i_blksize = 1024;
-	inode->i_uid = config.setuid ? config.uid : current->fsuid;
-	inode->i_gid = config.setgid ? config.gid : current->fsgid;
+	inode->i_uid = visible_devpts_config.setuid ?
+		visible_devpts_config.uid : current->fsuid;
+	inode->i_gid = visible_devpts_config.setgid ?
+		visible_devpts_config.gid : current->fsgid;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	init_special_inode(inode, S_IFCHR|config.mode, device);
+	init_special_inode(inode, S_IFCHR|visible_devpts_config.mode, device);
 	inode->i_op = &devpts_file_inode_operations;
 	inode->u.generic_ip = tty;
 
@@ -181,7 +203,7 @@ int devpts_pty_new(struct tty_struct *tt
 	if (!IS_ERR(dentry) && !dentry->d_inode)
 		d_instantiate(dentry, inode);
 
-	up(&devpts_root->d_inode->i_sem);
+	up(&visible_devpts_root->d_inode->i_sem);
 
 	return 0;
 }
@@ -198,7 +220,7 @@ struct tty_struct *devpts_get_tty(int nu
 		dput(dentry);
 	}
 
-	up(&devpts_root->d_inode->i_sem);
+	up(&visible_devpts_root->d_inode->i_sem);
 
 	return tty;
 }
@@ -216,22 +238,28 @@ void devpts_pty_kill(int number)
 		}
 		dput(dentry);
 	}
-	up(&devpts_root->d_inode->i_sem);
+	up(&visible_devpts_root->d_inode->i_sem);
 }
 
 static int __init init_devpts_fs(void)
 {
-	int err = register_filesystem(&devpts_fs_type);
+	int err;
+#ifdef CONFIG_VE
+	get_ve0()->devpts_config = &config;
+#endif
+	err = register_filesystem(&devpts_fs_type);
 	if (!err) {
 		devpts_mnt = kern_mount(&devpts_fs_type);
 		if (IS_ERR(devpts_mnt))
 			err = PTR_ERR(devpts_mnt);
 	}
+	prepare_devpts();
 	return err;
 }
 
 static void __exit exit_devpts_fs(void)
 {
+	/* the code is never called, the argument is irrelevant */
 	unregister_filesystem(&devpts_fs_type);
 	mntput(devpts_mnt);
 }
diff -Nurap linux-2.6.9-100.orig/fs/dquot.c linux-2.6.9-ve023stab054/fs/dquot.c
--- linux-2.6.9-100.orig/fs/dquot.c	2011-06-09 19:22:48.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/dquot.c	2011-06-15 19:26:20.000000000 +0400
@@ -152,7 +152,9 @@ static struct quota_format_type *find_qu
 	struct quota_format_type *actqf;
 
 	spin_lock(&dq_list_lock);
-	for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next);
+	for (actqf = quota_formats;
+		 actqf && (actqf->qf_fmt_id != id || actqf->qf_ops == NULL);
+						 actqf = actqf->qf_next);
 	if (!actqf || !try_module_get(actqf->qf_owner)) {
 		int qm;
 
diff -Nurap linux-2.6.9-100.orig/fs/eventpoll.c linux-2.6.9-ve023stab054/fs/eventpoll.c
--- linux-2.6.9-100.orig/fs/eventpoll.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/eventpoll.c	2011-06-15 19:26:21.000000000 +0400
@@ -155,11 +155,6 @@
 #define EP_OP_HASH_EVENT(op) ((op) != EPOLL_CTL_DEL)
 
 
-struct epoll_filefd {
-	struct file *file;
-	int fd;
-};
-
 /*
  * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
  * It is used to keep track on all tasks that are currently inside the wake_up() code
@@ -182,36 +177,6 @@ struct poll_safewake {
 	spinlock_t lock;
 };
 
-/*
- * This structure is stored inside the "private_data" member of the file
- * structure and rapresent the main data sructure for the eventpoll
- * interface.
- */
-struct eventpoll {
-	/* Protect the this structure access */
-	rwlock_t lock;
-
-	/*
-	 * This semaphore is used to ensure that files are not removed
-	 * while epoll is using them. This is read-held during the event
-	 * collection loop and it is write-held during the file cleanup
-	 * path, the epoll file exit code and the ctl operations.
-	 */
-	struct rw_semaphore sem;
-
-	/* Wait queue used by sys_epoll_wait() */
-	wait_queue_head_t wq;
-
-	/* Wait queue used by file->poll() */
-	wait_queue_head_t poll_wait;
-
-	/* List of ready file descriptors */
-	struct list_head rdllist;
-
-	/* RB-Tree root used to store monitored fd structs */
-	struct rb_root rbr;
-};
-
 /* Wait structure used by the poll hooks */
 struct eppoll_entry {
 	/* List header used to link this structure to the "struct epitem" */
@@ -230,50 +195,6 @@ struct eppoll_entry {
 	wait_queue_head_t *whead;
 };
 
-/*
- * Each file descriptor added to the eventpoll interface will
- * have an entry of this type linked to the hash.
- */
-struct epitem {
-	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
-	struct rb_node rbn;
-
-	/* List header used to link this structure to the eventpoll ready list */
-	struct list_head rdllink;
-
-	/* The file descriptor information this item refers to */
-	struct epoll_filefd ffd;
-
-	/* Number of active wait queue attached to poll operations */
-	int nwait;
-
-	/* List containing poll wait queues */
-	struct list_head pwqlist;
-
-	/* The "container" of this item */
-	struct eventpoll *ep;
-
-	/* The structure that describe the interested events and the source fd */
-	struct epoll_event event;
-
-	/*
-	 * Used to keep track of the usage count of the structure. This avoids
-	 * that the structure will desappear from underneath our processing.
-	 */
-	atomic_t usecnt;
-
-	/* List header used to link this item to the "struct file" items list */
-	struct list_head fllink;
-
-	/* List header used to link the item to the transfer list */
-	struct list_head txlink;
-
-	/*
-	 * This is used during the collection/transfer of events to userspace
-	 * to pin items empty events set.
-	 */
-	unsigned int revents;
-};
 
 /* Wrapper struct used by poll queueing */
 struct ep_pqueue {
@@ -288,13 +209,13 @@ static void ep_poll_safewake(struct poll
 static int ep_getfd(int *efd, struct inode **einode, struct file **efile);
 static int ep_file_init(struct file *file);
 static void ep_free(struct eventpoll *ep);
-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
 static void ep_use_epitem(struct epitem *epi);
-static void ep_release_epitem(struct epitem *epi);
+void ep_release_epitem(struct epitem *epi);
 static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
 				 poll_table *pt);
 static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi);
-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 		     struct file *tfile, int fd);
 static int ep_modify(struct eventpoll *ep, struct epitem *epi,
 		     struct epoll_event *event);
@@ -324,6 +245,7 @@ static struct super_block *eventpollfs_g
  * This semaphore is used to serialize ep_free() and eventpoll_release_file().
  */
 struct semaphore epsem;
+EXPORT_SYMBOL(epsem);
 
 /* Safe wake up implementation */
 static struct poll_safewake psw;
@@ -338,10 +260,11 @@ static kmem_cache_t *pwq_cache;
 static struct vfsmount *eventpoll_mnt;
 
 /* File callbacks that implement the eventpoll file behaviour */
-static struct file_operations eventpoll_fops = {
+struct file_operations eventpoll_fops = {
 	.release	= ep_eventpoll_close,
 	.poll		= ep_eventpoll_poll
 };
+EXPORT_SYMBOL(eventpoll_fops);
 
 /*
  * This is used to register the virtual file system from where
@@ -516,6 +439,7 @@ eexit_1:
 		     current, size, error));
 	return error;
 }
+EXPORT_SYMBOL(sys_epoll_create);
 
 
 /*
@@ -824,7 +748,7 @@ static void ep_free(struct eventpoll *ep
  * the returned item, so the caller must call ep_release_epitem()
  * after finished using the "struct epitem".
  */
-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
+struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 {
 	int kcmp;
 	unsigned long flags;
@@ -854,6 +778,7 @@ static struct epitem *ep_find(struct eve
 
 	return epir;
 }
+EXPORT_SYMBOL(ep_find);
 
 
 /*
@@ -872,12 +797,13 @@ static void ep_use_epitem(struct epitem 
  * has finished using the structure. It might lead to freeing the
  * structure itself if the count goes to zero.
  */
-static void ep_release_epitem(struct epitem *epi)
+void ep_release_epitem(struct epitem *epi)
 {
 
 	if (atomic_dec_and_test(&epi->usecnt))
 		EPI_MEM_FREE(epi);
 }
+EXPORT_SYMBOL(ep_release_epitem);
 
 
 /*
@@ -924,7 +850,7 @@ static void ep_rbtree_insert(struct even
 }
 
 
-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 		     struct file *tfile, int fd)
 {
 	int error, revents, pwake = 0;
@@ -1016,6 +942,7 @@ eexit_2:
 eexit_1:
 	return error;
 }
+EXPORT_SYMBOL(ep_insert);
 
 
 /*
diff -Nurap linux-2.6.9-100.orig/fs/exec.c linux-2.6.9-ve023stab054/fs/exec.c
--- linux-2.6.9-100.orig/fs/exec.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/exec.c	2011-06-15 19:26:22.000000000 +0400
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/mman.h>
+#include <linux/virtinfo.h>
 #include <linux/a.out.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
@@ -52,6 +53,8 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 
+#include <ub/ub_vmpages.h>
+
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
@@ -59,6 +62,7 @@
 int core_uses_pid;
 char core_pattern[65] = "core";
 int suid_dumpable = 0;
+int sysctl_at_vsyscall = 1;
 
 EXPORT_SYMBOL(suid_dumpable);
 
@@ -136,7 +140,7 @@ asmlinkage long sys_uselib(const char __
 	if (!S_ISREG(nd.dentry->d_inode->i_mode))
 		goto exit;
 
-	error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC, &nd);
+	error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC, &nd, NULL);
 	if (error)
 		goto exit;
 
@@ -305,10 +309,14 @@ void install_arg_page(struct vm_area_str
 			struct page *page, unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	struct page_beancounter *pbc;
 	pgd_t * pgd;
 	pmd_t * pmd;
 	pte_t * pte;
 
+	if (pb_alloc(&pbc))
+		return;
+
 	if (unlikely(anon_vma_prepare(vma)))
 		goto out_sig;
 
@@ -330,6 +338,10 @@ void install_arg_page(struct vm_area_str
 	lru_cache_add_active(page);
 	set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(
 					page, vma->vm_page_prot))));
+
+	ub_unused_privvm_dec(mm_ub(mm), 1, vma);
+	pb_add_ref(page, mm_ub(mm), &pbc);
+
 	page_add_anon_rmap(page, vma, address);
 	pte_unmap(pte);
 	spin_unlock(&mm->page_table_lock);
@@ -341,6 +353,31 @@ out:
 out_sig:
 	__free_page(page);
 	force_sig(SIGKILL, current);
+	pb_free(&pbc);
+}
+
+static inline void get_stack_vma_params(struct mm_struct *mm, int exec_stack,
+		unsigned long stack_base, struct linux_binprm *bprm,
+		unsigned long *start, unsigned long *end, unsigned long *flags)
+{
+#ifdef CONFIG_STACK_GROWSUP
+	*start = stack_base;
+	*end = PAGE_MASK &
+		(PAGE_SIZE - 1 + (unsigned long) bprm->p);
+#else
+	*start = PAGE_MASK & (unsigned long) bprm->p;
+	*end = STACK_TOP;
+#endif
+	/* Adjust stack execute permissions; explicitly enable
+	 * for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X
+	 * and leave alone (arch default) otherwise. */
+	if (unlikely(exec_stack == EXSTACK_ENABLE_X))
+		*flags = VM_STACK_FLAGS |  VM_EXEC;
+	else if (exec_stack == EXSTACK_DISABLE_X)
+		*flags = VM_STACK_FLAGS & ~VM_EXEC;
+	else
+		*flags = VM_STACK_FLAGS;
+	*flags |= mm->def_flags;
 }
 
 int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
@@ -351,6 +388,10 @@ int setup_arg_pages(struct linux_binprm 
 	int i, ret;
 	long arg_size;
 
+	unsigned long vm_start;
+	unsigned long vm_end;
+	unsigned long vm_flags;	
+
 #ifdef CONFIG_STACK_GROWSUP
 	/* Move the argument and environment strings to the bottom of the
 	 * stack space.
@@ -411,39 +452,28 @@ int setup_arg_pages(struct linux_binprm 
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
-	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	get_stack_vma_params(mm, executable_stack, stack_base, bprm,
+			&vm_start, &vm_end, &vm_flags);
+
+	ret = -ENOMEM;
+	if (ub_memory_charge(mm_ub(mm), vm_end - vm_start, vm_flags,
+				NULL, UB_SOFT))
+		goto out;
+	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL | __GFP_SOFT_UBC);
 	if (!mpnt)
-		return -ENOMEM;
+		goto out_uncharge;
 
 	memset(mpnt, 0, sizeof(*mpnt));
 
 	down_write(&mm->mmap_sem);
 	{
 		mpnt->vm_mm = mm;
-#ifdef CONFIG_STACK_GROWSUP
-		mpnt->vm_start = stack_base;
-		mpnt->vm_end = PAGE_MASK &
-			(PAGE_SIZE - 1 + (unsigned long) bprm->p);
-#else
-		mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
-		mpnt->vm_end = STACK_TOP;
-#endif
-		/* Adjust stack execute permissions; explicitly enable
-		 * for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X
-		 * and leave alone (arch default) otherwise. */
-		if (unlikely(executable_stack == EXSTACK_ENABLE_X))
-			mpnt->vm_flags = VM_STACK_FLAGS |  VM_EXEC;
-		else if (executable_stack == EXSTACK_DISABLE_X)
-			mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
-		else
-			mpnt->vm_flags = VM_STACK_FLAGS;
-		mpnt->vm_flags |= mm->def_flags;
+		mpnt->vm_start = vm_start;
+		mpnt->vm_end = vm_end;
+		mpnt->vm_flags = vm_flags;
 		mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7];
-		if ((ret = insert_vm_struct(mm, mpnt))) {
-			up_write(&mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, mpnt);
-			return ret;
-		}
+		if ((ret = insert_vm_struct(mm, mpnt)))
+			goto out_up_free;
 		mm->stack_vm = mm->total_vm = vma_pages(mpnt);
 	}
 
@@ -458,6 +488,14 @@ int setup_arg_pages(struct linux_binprm 
 	up_write(&mm->mmap_sem);
 	
 	return 0;
+
+out_up_free:
+	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, mpnt);
+out_uncharge:
+	ub_memory_uncharge(mm_ub(mm), vm_end - vm_start, vm_flags, NULL);
+out:
+	return ret;
 }
 
 EXPORT_SYMBOL(setup_arg_pages);
@@ -479,7 +517,7 @@ static inline void free_arg_pages(struct
 
 #endif /* CONFIG_MMU */
 
-struct file *open_exec(const char *name)
+struct file *open_exec(const char *name, struct linux_binprm *bprm)
 {
 	struct nameidata nd;
 	int err;
@@ -494,7 +532,13 @@ struct file *open_exec(const char *name)
 		file = ERR_PTR(-EACCES);
 		if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
 		    S_ISREG(inode->i_mode)) {
-			int err = permission(inode, MAY_EXEC, &nd);
+			int err;
+			if (bprm != NULL) {
+				bprm->perm.set = 0;
+				err = permission(inode, MAY_EXEC, &nd,
+						&bprm->perm);
+			} else
+				err = permission(inode, MAY_EXEC, &nd, NULL);
 			file = ERR_PTR(err);
 			if (!err) {
 				file = dentry_open(nd.dentry, nd.mnt, 
@@ -536,16 +580,11 @@ int kernel_read(struct file *file, unsig
 
 EXPORT_SYMBOL(kernel_read);
 
-static int exec_mmap(struct mm_struct *mm)
+static int exec_mmap(struct linux_binprm *bprm)
 {
 	struct task_struct *tsk;
-	struct mm_struct * old_mm, *active_mm;
-
-	/* Add it to the list of mm's */
-	spin_lock(&mmlist_lock);
-	list_add(&mm->mmlist, &init_mm.mmlist);
-	mmlist_nr++;
-	spin_unlock(&mmlist_lock);
+	struct mm_struct *mm, *old_mm, *active_mm;
+	int ret;
 
 	/* Notify parent that we're no longer interested in the old VM */
 	tsk = current;
@@ -567,21 +606,40 @@ static int exec_mmap(struct mm_struct *m
 			return -EINTR;
 		}
 	}
+
+	ret = 0;
 	task_lock(tsk);
+	mm = bprm->mm;
 	active_mm = tsk->active_mm;
 	tsk->mm = mm;
 	tsk->active_mm = mm;
 	activate_mm(active_mm, mm);
 	task_unlock(tsk);
 	arch_pick_mmap_layout(mm);
+
+	/* Add it to the list of mm's */
+	spin_lock(&mmlist_lock);
+	list_add(&mm->mmlist, &init_mm.mmlist);
+	mmlist_nr++;
+	spin_unlock(&mmlist_lock);
+	bprm->mm = NULL;		/* We're using it now */
+
+#ifdef CONFIG_VZ_GENCALLS
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_EXECMMAP,
+				bprm) & NOTIFY_FAIL) {
+		/* similar to binfmt_elf */
+		send_sig(SIGKILL, current, 0);
+		ret = -ENOMEM;
+	}
+#endif
 	if (old_mm) {
 		up_read(&old_mm->mmap_sem);
 		if (active_mm != old_mm) BUG();
 		mmput(old_mm);
-		return 0;
+		return ret;
 	}
 	mmdrop(active_mm);
-	return 0;
+	return ret;
 }
 
 /*
@@ -592,17 +650,21 @@ static int exec_mmap(struct mm_struct *m
  */
 static inline int de_thread(struct task_struct *tsk)
 {
-	struct signal_struct *newsig, *oldsig = tsk->signal;
+	struct signal_struct *sig = tsk->signal;
 	struct sighand_struct *newsighand, *oldsighand = tsk->sighand;
 	spinlock_t *lock = &oldsighand->siglock;
+	struct task_struct *leader = NULL;
 	int count;
 
 	/*
 	 * If we don't share sighandlers, then we aren't sharing anything
 	 * and we can just re-use it all.
 	 */
-	if (atomic_read(&oldsighand->count) <= 1)
+	if (atomic_read(&oldsighand->count) <= 1) {
+		BUG_ON(atomic_read(&sig->count) != 1);
+		exit_itimers(sig);
 		return 0;
+	}
 
 	newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
 	if (!newsighand)
@@ -612,56 +674,6 @@ static inline int de_thread(struct task_
 	atomic_set(&newsighand->count, 1);
 	memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action));
 
-	/*
-	 * See if we need to allocate a new signal structure
-	 */
-	newsig = NULL;
-	if (atomic_read(&oldsig->count) > 1) {
-		newsig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
-		if (!newsig) {
-			kmem_cache_free(sighand_cachep, newsighand);
-			return -ENOMEM;
-		}
-		atomic_set(&newsig->count, 1);
-		newsig->curr_target = NULL;
-		init_sigpending(&newsig->shared_pending);
-		newsig->group_exit = 0;
-		newsig->group_exit_code = 0;
-		newsig->group_exit_task = NULL;
-		newsig->notify_count = 0;
-		newsig->group_stop_count = 0;
-		newsig->stop_state = 0;
-		INIT_LIST_HEAD(&newsig->posix_timers);
-
-		newsig->pgrp = oldsig->pgrp;
-		newsig->tty_old_pgrp = oldsig->tty_old_pgrp;
-		newsig->session = oldsig->session;
-		newsig->leader = oldsig->leader;
-		newsig->tty = oldsig->tty;
-
-		newsig->utime = oldsig->utime;
-		newsig->stime = oldsig->stime;
-		newsig->cutime = oldsig->cutime;
-		newsig->cstime = oldsig->cstime;
-		newsig->nvcsw = oldsig->nvcsw;
-		newsig->nivcsw = oldsig->nivcsw;
-		newsig->cnvcsw = oldsig->cnvcsw;
-		newsig->cnivcsw = oldsig->cnivcsw;
-		newsig->min_flt = oldsig->min_flt;
-		newsig->maj_flt = oldsig->maj_flt;
-		newsig->cmin_flt = oldsig->cmin_flt;
-		newsig->cmaj_flt = oldsig->cmaj_flt;
-#ifdef CONFIG_KEYS
-		rcu_read_lock();
-		newsig->session_keyring =
-			key_get(rcu_dereference(oldsig->session_keyring));
-		rcu_read_unlock();
-
-		newsig->process_keyring = key_get(oldsig->process_keyring);
-#endif
-		atomic_set(&newsig->live, 1);
-	}
-
 	if (thread_group_empty(current))
 		goto no_thread_group;
 
@@ -671,7 +683,7 @@ static inline int de_thread(struct task_
 	 */
 	read_lock(&tasklist_lock);
 	spin_lock_irq(lock);
-	if (oldsig->group_exit) {
+	if (sig->group_exit) {
 		/*
 		 * Another group action in progress, just
 		 * return so that the signal is processed.
@@ -679,11 +691,9 @@ static inline int de_thread(struct task_
 		spin_unlock_irq(lock);
 		read_unlock(&tasklist_lock);
 		kmem_cache_free(sighand_cachep, newsighand);
-		if (newsig)
-			kmem_cache_free(signal_cachep, newsig);
 		return -EAGAIN;
 	}
-	oldsig->group_exit = 1;
+	sig->group_exit = 1;
 	zap_other_threads(current);
 	read_unlock(&tasklist_lock);
 
@@ -693,14 +703,16 @@ static inline int de_thread(struct task_
 	count = 2;
 	if (current->pid == current->tgid)
 		count = 1;
-	while (atomic_read(&oldsig->count) > count) {
-		oldsig->group_exit_task = current;
-		oldsig->notify_count = count;
+	while (atomic_read(&sig->count) > count) {
+		sig->group_exit_task = current;
+		sig->notify_count = count;
 		__set_current_state(TASK_UNINTERRUPTIBLE);
 		spin_unlock_irq(lock);
 		schedule();
 		spin_lock_irq(lock);
 	}
+	sig->group_exit_task = NULL;
+	sig->notify_count = 0;
 	spin_unlock_irq(lock);
 
 	/*
@@ -709,8 +721,8 @@ static inline int de_thread(struct task_
 	 * and to assume its PID:
 	 */
 	if (current->pid != current->tgid) {
-		struct task_struct *leader = current->group_leader, *parent;
-		struct dentry *proc_dentry1, *proc_dentry2;
+		struct task_struct *parent;
+		struct dentry *proc_dentry1[2], *proc_dentry2[2];
 		unsigned long ptrace;
 
 		/*
@@ -718,13 +730,14 @@ static inline int de_thread(struct task_
 		 * It should already be zombie at this point, most
 		 * of the time.
 		 */
+		leader = current->group_leader;
 		while (leader->exit_state != EXIT_ZOMBIE)
 			yield();
 
 		spin_lock(&leader->proc_lock);
 		spin_lock(&current->proc_lock);
-		proc_dentry1 = proc_pid_unhash(current);
-		proc_dentry2 = proc_pid_unhash(leader);
+		proc_pid_unhash(current, proc_dentry1);
+		proc_pid_unhash(leader, proc_dentry2);
 		write_lock_irq(&tasklist_lock);
 
 		if (leader->tgid != current->tgid)
@@ -781,38 +794,52 @@ static inline int de_thread(struct task_
 		proc_pid_flush(proc_dentry1);
 		proc_pid_flush(proc_dentry2);
 
-		release_task(leader);
         }
 
+	/*
+	 * Now there are really no other threads at all,
+	 * so it's safe to stop telling them to kill themselves.
+	 */
+	sig->group_exit = 0;
+
 no_thread_group:
+	exit_itimers(sig);
+	if (leader)
+		release_task(leader);
 
-	write_lock_irq(&tasklist_lock);
-	spin_lock(&oldsighand->siglock);
-	spin_lock(&newsighand->siglock);
-
-	if (current == oldsig->curr_target)
-		oldsig->curr_target = next_thread(current);
-	if (newsig)
-		current->signal = newsig;
-	current->sighand = newsighand;
-	init_sigpending(&current->pending);
-	recalc_sigpending();
-
-	spin_unlock(&newsighand->siglock);
-	spin_unlock(&oldsighand->siglock);
-	write_unlock_irq(&tasklist_lock);
-
-	if (newsig && atomic_dec_and_test(&oldsig->count)) {
-		exit_itimers(oldsig);
-		exit_thread_group_keys(oldsig);
-		kmem_cache_free(signal_cachep, oldsig);
-	}
+	BUG_ON(atomic_read(&sig->count) != 1);
 
-	if (atomic_dec_and_test(&oldsighand->count))
-		kmem_cache_free(sighand_cachep, oldsighand);
+	if (atomic_read(&oldsighand->count) == 1) {
+		/*
+		 * Now that we nuked the rest of the thread group,
+		 * it turns out we are not sharing sighand any more either.
+		 * So we can just keep it.
+		 */
+		kmem_cache_free(sighand_cachep, newsighand);
+	} else {
+		/*
+		 * Move our state over to newsighand and switch it in.
+		 */
+		spin_lock_init(&newsighand->siglock);
+		atomic_set(&newsighand->count, 1);
+		memcpy(newsighand->action, oldsighand->action,
+				sizeof(newsighand->action));
+
+		write_lock_irq(&tasklist_lock);
+		spin_lock(&oldsighand->siglock);
+		spin_lock(&newsighand->siglock);
+
+		current->sighand = newsighand;
+		recalc_sigpending();
+
+		spin_unlock(&newsighand->siglock);
+		spin_unlock(&oldsighand->siglock);
+		write_unlock_irq(&tasklist_lock);
+
+		if (atomic_dec_and_test(&oldsighand->count))
+			kmem_cache_free(sighand_cachep, oldsighand);
+	}
 
-	if (!thread_group_empty(current))
-		BUG();
 	if (current->tgid != current->pid)
 		BUG();
 	return 0;
@@ -891,12 +918,10 @@ int flush_old_exec(struct linux_binprm *
 	/*
 	 * Release all of the old mmap stuff
 	 */
-	retval = exec_mmap(bprm->mm);
+	retval = exec_mmap(bprm);
 	if (retval)
 		goto mmap_failed;
 
-	bprm->mm = NULL;		/* We're using it now */
-
 	/* This is the point of no return */
 	steal_locks(files);
 	put_files_struct(files);
@@ -943,11 +968,12 @@ void setup_new_exec(struct linux_binprm 
 		suid_keys(current);
  		current->mm->dumpable = suid_dumpable;
 		current->pdeath_signal = 0;
-	} else if (permission(bprm->file->f_dentry->d_inode, MAY_READ, NULL) ||
+	} else if (permission(bprm->file->f_dentry->d_inode, MAY_READ, NULL, NULL) ||
 			(bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
 		suid_keys(current);
 		current->mm->dumpable = suid_dumpable;
 	}
+	current->mm->vps_dumpable = 1;
 
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
@@ -979,10 +1005,24 @@ int prepare_binprm(struct linux_binprm *
 	bprm->e_gid = current->egid;
 
 	if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
+		if (!bprm->perm.set) {
+			/*
+			 * This piece of code creates a time window between
+			 * MAY_EXEC permission check and setuid/setgid
+			 * operations and may be considered as a security hole.
+			 * This code is here for compatibility reasons,
+			 * if the filesystem is unable to return info now.
+			 */
+			bprm->perm.mode = inode->i_mode;
+			bprm->perm.uid = inode->i_uid;
+			bprm->perm.gid = inode->i_gid;
+		}
+		mode = bprm->perm.mode;
+
 		/* Set-uid? */
 		if (mode & S_ISUID) {
 			current->personality &= ~PER_CLEAR_ON_SETID;
-			bprm->e_uid = inode->i_uid;
+			bprm->e_uid = bprm->perm.uid;
 		}
 
 		/* Set-gid? */
@@ -993,7 +1033,7 @@ int prepare_binprm(struct linux_binprm *
 		 */
 		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
 			current->personality &= ~PER_CLEAR_ON_SETID;
-			bprm->e_gid = inode->i_gid;
+			bprm->e_gid = bprm->perm.gid;
 		}
 	}
 
@@ -1093,7 +1133,7 @@ int search_binary_handler(struct linux_b
 
 	        loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
 
-		file = open_exec("/sbin/loader");
+		file = open_exec("/sbin/loader", bprm);
 		retval = PTR_ERR(file);
 		if (IS_ERR(file))
 			return retval;
@@ -1185,20 +1225,24 @@ int do_execve(char * filename,
 	int retval;
 	int i;
 
-	file = open_exec(filename);
-
-	retval = PTR_ERR(file);
-	if (IS_ERR(file))
+	retval = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
+	if (retval)
 		return retval;
 
-	sched_exec();
-
 	retval = -ENOMEM;
 	bprm = kmalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
 		goto out_ret;
 	memset(bprm, 0, sizeof(*bprm));
 
+	file = open_exec(filename, bprm);
+
+	retval = PTR_ERR(file);
+	if (IS_ERR(file))
+		goto out_open;
+
+	sched_exec();
+
 	bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
 
 	bprm->file = file;
@@ -1271,6 +1315,7 @@ out_file:
 		allow_write_access(bprm->file);
 		fput(bprm->file);
 	}
+out_open:
 	kfree(bprm);
 
 out_ret:
@@ -1328,7 +1373,7 @@ static void format_corename(char *corena
 			case 'p':
 				pid_in_pattern = 1;
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", current->tgid);
+					      "%d", virt_tgid(current));
 				if (rc > out_end - out_ptr)
 					goto out;
 				out_ptr += rc;
@@ -1372,7 +1417,7 @@ static void format_corename(char *corena
 			case 'h':
 				down_read(&uts_sem);
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%s", system_utsname.nodename);
+					      "%s", ve_utsname.nodename);
 				up_read(&uts_sem);
 				if (rc > out_end - out_ptr)
 					goto out;
@@ -1400,7 +1445,7 @@ static void format_corename(char *corena
 	if (!pid_in_pattern
             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
 		rc = snprintf(out_ptr, out_end - out_ptr,
-			      ".%d", current->tgid);
+			      ".%d", virt_tgid(current));
 		if (rc > out_end - out_ptr)
 			goto out;
 		out_ptr += rc;
@@ -1426,7 +1471,7 @@ static void zap_threads (struct mm_struc
 	}
 
 	read_lock(&tasklist_lock);
-	do_each_thread(g,p)
+	do_each_thread_ve(g,p)
 		if (mm == p->mm && p != tsk) {
 			force_sig_specific(SIGKILL, p);
 			mm->core_waiters++;
@@ -1434,7 +1479,7 @@ static void zap_threads (struct mm_struc
 			    unlikely(p->parent->mm == mm))
 				traced = 1;
 		}
-	while_each_thread(g,p);
+	while_each_thread_ve(g,p);
 
 	read_unlock(&tasklist_lock);
 
@@ -1446,12 +1491,12 @@ static void zap_threads (struct mm_struc
 		 * coredump to finish.  Detach them so they can both die.
 		 */
 		write_lock_irq(&tasklist_lock);
-		do_each_thread(g,p) {
+		do_each_thread_ve(g,p) {
 			if (mm == p->mm && p != tsk &&
 			    p->ptrace && p->parent->mm == mm) {
 				__ptrace_detach(p,0);
 			}
-		} while_each_thread(g,p);
+		} while_each_thread_ve(g,p);
 		write_unlock_irq(&tasklist_lock);
 	}
 }
@@ -1492,7 +1537,7 @@ int do_coredump(long signr, int exit_cod
 	if (current->tux_exit)
 		current->tux_exit();
 	down_write(&mm->mmap_sem);
-	if (!mm->dumpable) {
+	if (!mm->dumpable || mm->vps_dumpable != 1) {
 		up_write(&mm->mmap_sem);
 		goto fail;
 	}
diff -Nurap linux-2.6.9-100.orig/fs/ext2/acl.c linux-2.6.9-ve023stab054/fs/ext2/acl.c
--- linux-2.6.9-100.orig/fs/ext2/acl.c	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext2/acl.c	2011-06-15 19:26:19.000000000 +0400
@@ -286,7 +286,7 @@ ext2_set_acl(struct inode *inode, int ty
  * inode->i_sem: don't care
  */
 int
-ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
+__ext2_permission(struct inode *inode, int mask)
 {
 	int mode = inode->i_mode;
 
@@ -338,6 +338,29 @@ check_capabilities:
 	return -EACCES;
 }
 
+int
+ext2_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *exec_perm)
+{
+	int ret;
+
+	if (exec_perm != NULL)
+		down(&inode->i_sem);
+
+	ret = __ext2_permission(inode, mask);
+
+	if (exec_perm != NULL) {
+		if (!ret) {
+			exec_perm->set = 1;
+			exec_perm->mode = inode->i_mode;
+			exec_perm->uid = inode->i_uid;
+			exec_perm->gid = inode->i_gid;
+		}
+		up(&inode->i_sem);
+	}
+	return ret;
+}
+
 /*
  * Initialize the ACLs of a new inode. Called from ext2_new_inode.
  *
diff -Nurap linux-2.6.9-100.orig/fs/ext2/acl.h linux-2.6.9-ve023stab054/fs/ext2/acl.h
--- linux-2.6.9-100.orig/fs/ext2/acl.h	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext2/acl.h	2011-06-15 19:26:19.000000000 +0400
@@ -59,7 +59,8 @@ static inline int ext2_acl_count(size_t 
 #define EXT2_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext2_permission (struct inode *, int, struct nameidata *);
+extern int ext2_permission (struct inode *, int, struct nameidata *,
+		struct exec_perm *);
 extern int ext2_acl_chmod (struct inode *);
 extern int ext2_init_acl (struct inode *, struct inode *);
 
@@ -77,7 +78,6 @@ ext2_acl_chmod (struct inode *inode)
 
 static inline int ext2_init_acl (struct inode *inode, struct inode *dir)
 {
-	inode->i_mode &= ~current->fs->umask;
 	return 0;
 }
 #endif
diff -Nurap linux-2.6.9-100.orig/fs/ext2/ialloc.c linux-2.6.9-ve023stab054/fs/ext2/ialloc.c
--- linux-2.6.9-100.orig/fs/ext2/ialloc.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext2/ialloc.c	2011-06-15 19:26:19.000000000 +0400
@@ -609,12 +609,13 @@ got:
 
 	if (DQUOT_ALLOC_INODE(inode)) {
 		DQUOT_DROP(inode);
-		err = -ENOSPC;
+		err = -EDQUOT;
 		goto fail2;
 	}
 	err = ext2_init_acl(inode, dir);
 	if (err) {
 		DQUOT_FREE_INODE(inode);
+		DQUOT_DROP(inode);
 		goto fail2;
 	}
 	mark_inode_dirty(inode);
diff -Nurap linux-2.6.9-100.orig/fs/ext2/namei.c linux-2.6.9-ve023stab054/fs/ext2/namei.c
--- linux-2.6.9-100.orig/fs/ext2/namei.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext2/namei.c	2011-06-15 19:26:20.000000000 +0400
@@ -30,6 +30,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -282,6 +283,8 @@ static int ext2_unlink(struct inode * di
 	struct page * page;
 	int err = -ENOENT;
 
+	DQUOT_INIT(inode);
+
 	de = ext2_find_entry (dir, dentry, &page);
 	if (!de)
 		goto out;
@@ -324,6 +327,9 @@ static int ext2_rename (struct inode * o
 	struct ext2_dir_entry_2 * old_de;
 	int err = -ENOENT;
 
+	if (new_inode)
+		DQUOT_INIT(new_inode);
+
 	old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
 	if (!old_de)
 		goto out;
diff -Nurap linux-2.6.9-100.orig/fs/ext2/super.c linux-2.6.9-ve023stab054/fs/ext2/super.c
--- linux-2.6.9-100.orig/fs/ext2/super.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext2/super.c	2011-06-15 19:26:19.000000000 +0400
@@ -332,7 +332,6 @@ static int parse_options (char * options
 {
 	char * p;
 	substring_t args[MAX_OPT_ARGS];
-	unsigned long kind = EXT2_MOUNT_ERRORS_CONT;
 	int option;
 
 	if (!options)
@@ -372,13 +371,19 @@ static int parse_options (char * options
 			/* *sb_block = match_int(&args[0]); */
 			break;
 		case Opt_err_panic:
-			kind = EXT2_MOUNT_ERRORS_PANIC;
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			set_opt (sbi->s_mount_opt, ERRORS_PANIC);
 			break;
 		case Opt_err_ro:
-			kind = EXT2_MOUNT_ERRORS_RO;
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_RO);
 			break;
 		case Opt_err_cont:
-			kind = EXT2_MOUNT_ERRORS_CONT;
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_CONT);
 			break;
 		case Opt_nouid32:
 			set_opt (sbi->s_mount_opt, NO_UID32);
@@ -437,7 +442,6 @@ static int parse_options (char * options
 			return 0;
 		}
 	}
-	sbi->s_mount_opt |= kind;
 	return 1;
 }
 
@@ -664,6 +668,8 @@ static int ext2_fill_super(struct super_
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO)
 		set_opt(sbi->s_mount_opt, ERRORS_RO);
+	else
+		set_opt(sbi->s_mount_opt, ERRORS_CONT);
 
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -1061,7 +1067,7 @@ static struct file_system_type ext2_fs_t
 	.name		= "ext2",
 	.get_sb		= ext2_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_VIRTUALIZED,
 };
 
 static int __init init_ext2_fs(void)
diff -Nurap linux-2.6.9-100.orig/fs/ext2/xattr_user.c linux-2.6.9-ve023stab054/fs/ext2/xattr_user.c
--- linux-2.6.9-100.orig/fs/ext2/xattr_user.c	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext2/xattr_user.c	2011-06-15 19:26:19.000000000 +0400
@@ -41,7 +41,7 @@ ext2_xattr_user_get(struct inode *inode,
 		return -EINVAL;
 	if (!test_opt(inode->i_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	error = permission(inode, MAY_READ, NULL);
+	error = permission(inode, MAY_READ, NULL, NULL);
 	if (error)
 		return error;
 
@@ -61,7 +61,7 @@ ext2_xattr_user_set(struct inode *inode,
 	if ( !S_ISREG(inode->i_mode) &&
 	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
 		return -EPERM;
-	error = permission(inode, MAY_WRITE, NULL);
+	error = permission(inode, MAY_WRITE, NULL, NULL);
 	if (error)
 		return error;
 
diff -Nurap linux-2.6.9-100.orig/fs/ext3/acl.c linux-2.6.9-ve023stab054/fs/ext3/acl.c
--- linux-2.6.9-100.orig/fs/ext3/acl.c	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext3/acl.c	2011-06-15 19:26:19.000000000 +0400
@@ -289,7 +289,7 @@ ext3_set_acl(handle_t *handle, struct in
  * inode->i_sem: don't care
  */
 int
-ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
+__ext3_permission(struct inode *inode, int mask)
 {
 	int mode = inode->i_mode;
 
@@ -341,6 +341,29 @@ check_capabilities:
 	return -EACCES;
 }
 
+int
+ext3_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *exec_perm)
+{
+	int ret;
+
+	if (exec_perm != NULL)
+		down(&inode->i_sem);
+
+	ret = __ext3_permission(inode, mask);
+
+	if (exec_perm != NULL) {
+		if (!ret) {
+			exec_perm->set = 1;
+			exec_perm->mode = inode->i_mode;
+			exec_perm->uid = inode->i_uid;
+			exec_perm->gid = inode->i_gid;
+		}
+		up(&inode->i_sem);
+	}
+	return ret;
+}
+
 /*
  * Initialize the ACLs of a new inode. Called from ext3_new_inode.
  *
diff -Nurap linux-2.6.9-100.orig/fs/ext3/acl.h linux-2.6.9-ve023stab054/fs/ext3/acl.h
--- linux-2.6.9-100.orig/fs/ext3/acl.h	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext3/acl.h	2011-06-15 19:26:19.000000000 +0400
@@ -58,7 +58,8 @@ static inline int ext3_acl_count(size_t 
 #define EXT3_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext3_permission (struct inode *, int, struct nameidata *);
+extern int ext3_permission (struct inode *, int, struct nameidata *,
+		struct exec_perm *);
 extern int ext3_acl_chmod (struct inode *);
 extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
 
@@ -78,7 +79,6 @@ ext3_acl_chmod(struct inode *inode)
 static inline int
 ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 {
-	inode->i_mode &= ~current->fs->umask;
 	return 0;
 }
 #endif  /* CONFIG_EXT3_FS_POSIX_ACL */
diff -Nurap linux-2.6.9-100.orig/fs/ext3/ialloc.c linux-2.6.9-ve023stab054/fs/ext3/ialloc.c
--- linux-2.6.9-100.orig/fs/ext3/ialloc.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext3/ialloc.c	2011-06-15 19:26:19.000000000 +0400
@@ -609,12 +609,14 @@ got:
 	err = ext3_init_acl(handle, inode, dir);
 	if (err) {
 		DQUOT_FREE_INODE(inode);
+		DQUOT_DROP(inode);
 		goto fail2;
   	}
 	err = ext3_mark_inode_dirty(handle, inode);
 	if (err) {
 		ext3_std_error(sb, err);
 		DQUOT_FREE_INODE(inode);
+		DQUOT_DROP(inode);
 		goto fail2;
 	}
 
diff -Nurap linux-2.6.9-100.orig/fs/ext3/inode.c linux-2.6.9-ve023stab054/fs/ext3/inode.c
--- linux-2.6.9-100.orig/fs/ext3/inode.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext3/inode.c	2011-06-15 19:26:22.000000000 +0400
@@ -956,6 +956,14 @@ static int walk_page_buffers(	handle_t *
  * and the commit_write().  So doing the journal_start at the start of
  * prepare_write() is the right place.
  *
+ * [2004/09/04 SAW] journal_start() in prepare_write() causes different ranking
+ * violations if copy_from_user() triggers a page fault (mmap_sem, may be page
+ * lock, plus __GFP_FS allocations).
+ * Now we read in not up-to-date buffers in prepare_write(), and do the rest
+ * including hole instantiation and inode extension in commit_write().
+ *
+ * Other notes.
+ *
  * Also, this function can nest inside ext3_writepage() ->
  * block_write_full_page(). In that case, we *know* that ext3_writepage()
  * has generated enough buffer credits to do the whole page.  So we won't
@@ -975,6 +983,66 @@ static int walk_page_buffers(	handle_t *
  * write.  
  */
 
+static int ext3_get_block_delay(struct inode *inode, sector_t iblock,
+			struct buffer_head *bh, int create)
+{
+	int ret;
+
+	ret = ext3_get_block_handle(NULL, inode, iblock, bh, 0, 0);
+	if (ret)
+		return ret;
+	if (!buffer_mapped(bh))
+		set_buffer_new(bh);
+	return ret;
+}
+
+/*
+ * Here we make sure that the set of buffers written at once either is fully
+ * mapped at the start, or is fully not mapped and will be allocated at
+ * commit_write time.  --SAW
+ */
+static int ext3_check_block_mapping(struct page *page,
+		unsigned from, unsigned to)
+{
+	struct buffer_head *bh, *head, *next;
+	unsigned block_start, block_end;
+	unsigned blocksize;
+	int mapping, c;
+
+	head = page_buffers(page);
+	blocksize = head->b_size;
+	if (blocksize == PAGE_SIZE)
+		return 0;
+
+	mapping = -1;
+	for (	bh = head, block_start = 0;
+		bh != head || !block_start;
+	    	block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from || block_start >= to)
+			continue;
+		c = !!buffer_mapped(bh);
+		if (mapping < 0)
+			mapping = c;
+		else if (mapping != c)
+			return block_start - from;
+	}
+	return 0;
+}
+
+static int ext3_prepare_write(struct file *file, struct page *page,
+		unsigned from, unsigned to)
+{
+	int ret;
+
+	ret = block_prepare_write(page, from, to, ext3_get_block_delay);
+	if (ret)
+		return ret;
+	return ext3_check_block_mapping(page, from, to);
+}
+
 static int do_journal_get_write_access(handle_t *handle, 
 				       struct buffer_head *bh)
 {
@@ -983,34 +1051,227 @@ static int do_journal_get_write_access(h
 	return ext3_journal_get_write_access(handle, bh);
 }
 
-static int ext3_prepare_write(struct file *file, struct page *page,
-			      unsigned from, unsigned to)
+/*
+ * This function makes sure that a single prepare_write-commit_write loop
+ * processes either a set of page buffers that are already allocated in full,
+ * or completely not allocated.
+ */
+static int ext3_check_write(struct inode *inode, struct page *page,
+			    unsigned from, unsigned to)
+{
+	struct buffer_head *bh, *head, *next;
+	unsigned block_start, block_end;
+	unsigned blocksize, bbits;
+	sector_t block;
+
+	head = page_buffers(page);
+	bbits = inode->i_blkbits;
+	blocksize = 1 << bbits;
+	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+	for (	bh = head, block_start = 0;
+		bh != head || !block_start;
+	    	block++, block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from || block_start >= to)
+			continue;
+		if (!buffer_mapped(bh))
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Block allocation loop.
+ * This function repeats the second part of block_prepare_write
+ * (the first part being responsible for reading in block parts that are not
+ * up-to-date).
+ * On error, the content should not be zeroed!
+ */
+static int ext3_do_map_write(struct inode *inode, struct page *page,
+			     unsigned from, unsigned to)
+{
+	struct buffer_head *bh, *head, *next;
+	unsigned block_start, block_end;
+	unsigned blocksize, bbits;
+	sector_t block;
+	int err;
+
+	head = page_buffers(page);
+	bbits = inode->i_blkbits;
+	blocksize = 1 << bbits;
+	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+	for (	bh = head, block_start = 0;
+		bh != head || !block_start;
+	    	block++, block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from || block_start >= to)
+			continue;
+		if (buffer_mapped(bh))
+			continue;
+		err = ext3_get_block(inode, block, bh, 1);
+		if (err)
+			return err;
+		/* buffer must be new and mapped here */
+		clear_buffer_new(bh);
+		unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+	}
+	return 0;
+}
+
+/*
+ * The idea of this helper function is following:
+ * if prepare_write has allocated some blocks, but not all of them, the
+ * transaction must include the content of the newly allocated blocks.
+ * 2006/10/14  SAW
+ */
+static int ext3_prepare_failure(struct file *file, struct page *page,
+				unsigned from, unsigned to)
+{
+	struct address_space *mapping;
+	struct buffer_head *bh, *head, *next;
+	unsigned block_start, block_end;
+	unsigned blocksize;
+	int ret;
+	handle_t *handle = journal_current_handle();
+
+	mapping = page->mapping;
+	if (ext3_should_writeback_data(mapping->host)) {
+		/* optimization: no constraints about data */
+skip:
+		return 0;
+	}
+
+	head = page_buffers(page);
+	blocksize = head->b_size;
+	for (	bh = head, block_start = 0;
+		bh != head || !block_start;
+	    	block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from)
+			continue;
+		if (block_start >= to) {
+			block_start = to;
+			break;
+		}
+		if (!buffer_mapped(bh))
+		/* prepare_write failed on this bh */
+			break;
+		if (ext3_should_journal_data(mapping->host)) {
+			ret = do_journal_get_write_access(handle, bh);
+			if (ret)
+				return ret;
+		}
+	/*
+	 * block_start here becomes the first block where the current iteration
+	 * of prepare_write failed.
+	 */
+	}
+	if (block_start <= from)
+		goto skip;
+
+	/* commit allocated buffers */
+	ret = mapping->a_ops->commit_write(file, page, from, block_start);
+	if (ret < 0)
+		return ret;
+	return block_start - from;
+}
+
+/*
+ * This function zeroes buffers not mapped to disk.
+ * The purpose of it is the same as of error recovery in
+ * __block_prepare_write(), to avoid keeping garbage in the page cache.
+ * The code is repeated here since on-disk space is now allocated in
+ * commit_write, not in prepare_write.
+ *
+ * This function is called only for disk blocks where allocation failed,
+ * so they can be cleared without fear.
+ */
+static void ext3_rollback_write(struct page *page,
+		unsigned from, unsigned to)
+{
+	struct buffer_head *bh, *head, *next;
+	unsigned block_start, block_end;
+	unsigned blocksize;
+	void *kaddr;
+
+	kaddr = kmap_atomic(page, KM_USER0);
+	memset(kaddr + from, 0, to - from);
+	flush_dcache_page(page);
+	kunmap_atomic(kaddr, KM_USER0);
+
+	head = page_buffers(page);
+	blocksize = head->b_size;
+	for (	bh = head, block_start = 0;
+		bh != head || !block_start;
+	    	block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from || block_start >= to)
+			continue;
+		set_buffer_uptodate(bh);
+	}
+}
+
+static int ext3_map_write(struct file *file, struct page *page,
+			  unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+	int ret, ret2, ret3;
+	int needed_blocks = ext3_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
+	int need_alloc;
 
+		/* This may be recursive call:  
+		 *  commit_write                
+		 *    ->ext3_map_write          (1)
+		 *       ->ext3_prepare_failure
+		 *          ->commit_write
+		 *             ->ext3_map_write (2) 
+		 *
+		 * But it is not a problem because we don't alloc any blocks 
+		 * during stage(2). 
+		 * Result: only two recursive calls are posible. 
+		 */
+	need_alloc = ext3_check_write(inode, page, from, to);
 retry:
 	handle = ext3_journal_start(inode, needed_blocks);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		goto out;
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = 0;
+	if (need_alloc) {
+		ret = ext3_do_map_write(inode, page, from, to);
+		if (ret)
+			goto failure;
 	}
-	ret = block_prepare_write(page, from, to, ext3_get_block);
-	if (ret)
-		goto prepare_write_failed;
-
 	if (ext3_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
+		if (ret)
+			/* fatal error, just put the handle and return */
+			journal_stop(handle);
 	}
-prepare_write_failed:
-	if (ret)
-		ext3_journal_stop(handle);
+	return ret;
+
+failure:
+	ret2 = ext3_prepare_failure(file, page, from, to);
+	ret3 = ext3_journal_stop(handle);
+	if (ret2)
+		/* fatal error, or some data has been written successfully */
+		return ret2;
+	if (ret3)
+		return ret3;
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
-out:
+	/* retry number exceeded, or other error like -EDQUOT */
+	ext3_rollback_write(page, from, to);
 	return ret;
 }
 
@@ -1044,9 +1305,14 @@ static int commit_write_fn(handle_t *han
 static int ext3_ordered_commit_write(struct file *file, struct page *page,
 			     unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle;
 	struct inode *inode = page->mapping->host;
-	int ret = 0, ret2;
+	int ret, ret2;
+
+	ret = ext3_map_write(file, page, from, to);
+	if (ret)
+		return ret;
+	handle = ext3_journal_current_handle();
 
 	ret = walk_page_buffers(handle, page_buffers(page),
 		from, to, NULL, ext3_journal_dirty_data);
@@ -1073,11 +1339,16 @@ static int ext3_ordered_commit_write(str
 static int ext3_writeback_commit_write(struct file *file, struct page *page,
 			     unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle;
 	struct inode *inode = page->mapping->host;
-	int ret = 0, ret2;
+	int ret, ret2;
 	loff_t new_i_size;
 
+	ret = ext3_map_write(file, page, from, to);
+	if (ret)
+		return ret;
+	handle = ext3_journal_current_handle();
+
 	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 	if (new_i_size > EXT3_I(inode)->i_disksize)
 		EXT3_I(inode)->i_disksize = new_i_size;
@@ -1091,12 +1362,17 @@ static int ext3_writeback_commit_write(s
 static int ext3_journalled_commit_write(struct file *file,
 			struct page *page, unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle;
 	struct inode *inode = page->mapping->host;
-	int ret = 0, ret2;
+	int ret, ret2;
 	int partial = 0;
 	loff_t pos;
 
+	ret = ext3_map_write(file, page, from, to);
+	if (ret)
+		return ret;
+	handle = ext3_journal_current_handle();
+
 	/*
 	 * Here we duplicate the generic_commit_write() functionality
 	 */
@@ -1364,8 +1640,11 @@ static int ext3_journalled_writepage(str
 		ClearPageChecked(page);
 		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
 					ext3_get_block);
-		if (ret != 0)
-			goto out_unlock;
+		if (ret != 0) {
+			ext3_journal_stop(handle);
+			unlock_page(page);
+			return ret;
+		}
 		ret = walk_page_buffers(handle, page_buffers(page), 0,
 			PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
 
@@ -1391,7 +1670,6 @@ out:
 
 no_write:
 	redirty_page_for_writepage(wbc, page);
-out_unlock:
 	unlock_page(page);
 	goto out;
 }
@@ -2691,7 +2969,7 @@ out_brelse:
  */
 int ext3_write_inode(struct inode *inode, int wait)
 {
-	if (current->flags & PF_MEMALLOC)
+	if ((current->flags & PF_MEMALLOC) || test_thread_flag(TIF_MEMDIE))
 		return 0;
 
 	if (ext3_journal_current_handle()) {
diff -Nurap linux-2.6.9-100.orig/fs/ext3/ioctl.c linux-2.6.9-ve023stab054/fs/ext3/ioctl.c
--- linux-2.6.9-100.orig/fs/ext3/ioctl.c	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext3/ioctl.c	2011-06-15 19:26:19.000000000 +0400
@@ -68,7 +68,7 @@ int ext3_ioctl (struct inode * inode, st
 		 * the relevant capability.
 		 */
 		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
-			if (!capable(CAP_SYS_RESOURCE))
+			if (!capable(CAP_SYS_ADMIN))
 				return -EPERM;
 		}
 
diff -Nurap linux-2.6.9-100.orig/fs/ext3/namei.c linux-2.6.9-ve023stab054/fs/ext3/namei.c
--- linux-2.6.9-100.orig/fs/ext3/namei.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext3/namei.c	2011-06-15 19:26:19.000000000 +0400
@@ -972,6 +972,7 @@ static struct buffer_head * ext3_dx_find
 				  (block<<EXT3_BLOCK_SIZE_BITS(sb))
 					  +((char *)de - bh->b_data))) {
 				brelse (bh);
+				*err = ERR_BAD_DX_DIR;
 				goto errout;
 			}
 			*res_dir = de;
@@ -1016,6 +1017,11 @@ static struct dentry *ext3_lookup(struct
 		inode = ext3_iget(dir->i_sb, ino);
 		if (IS_ERR(inode))
 			return ERR_CAST(inode);
+
+		if (is_bad_inode(inode)) {
+			iput(inode);
+			return ERR_PTR(-ENOENT);
+		}
 	}
 	if (inode)
 		return d_splice_alias(inode, dentry);
@@ -1047,6 +1053,11 @@ struct dentry *ext3_get_parent(struct de
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
 
+	if (is_bad_inode(inode)) {
+		iput(inode);
+		return ERR_PTR(-ENOENT);	
+	}
+
 	parent = d_alloc_anon(inode);
 	if (!parent) {
 		iput(inode);
@@ -1140,9 +1151,9 @@ static struct ext3_dir_entry_2 *do_split
 	char *data1 = (*bh)->b_data, *data2;
 	unsigned split, move, size, i;
 	struct ext3_dir_entry_2 *de = NULL, *de2;
-	int	err;
+	int	err = 0;
 
-	bh2 = ext3_append (handle, dir, &newblock, error);
+	bh2 = ext3_append (handle, dir, &newblock, &err);
 	if (!(bh2)) {
 		brelse(*bh);
 		*bh = NULL;
@@ -1151,14 +1162,9 @@ static struct ext3_dir_entry_2 *do_split
 
 	BUFFER_TRACE(*bh, "get_write_access");
 	err = ext3_journal_get_write_access(handle, *bh);
-	if (err) {
-	journal_error:
-		brelse(*bh);
-		brelse(bh2);
-		*bh = NULL;
-		ext3_std_error(dir->i_sb, err);
-		goto errout;
-	}
+	if (err)
+		goto journal_error;
+
 	BUFFER_TRACE(frame->bh, "get_write_access");
 	err = ext3_journal_get_write_access(handle, frame->bh);
 	if (err)
@@ -1212,8 +1218,16 @@ static struct ext3_dir_entry_2 *do_split
 		goto journal_error;
 	brelse (bh2);
 	dxtrace(dx_show_index ("frame", frame->entries));
-errout:
 	return de;
+
+journal_error:
+	brelse(*bh);
+	brelse(bh2);
+	*bh = NULL;
+	ext3_std_error(dir->i_sb, err);
+errout:
+	*error = err;
+	return NULL;
 }
 #endif
 
@@ -1310,7 +1324,7 @@ static int add_dirent_to_buf(handle_t *h
 	if (err)
 		ext3_std_error(dir->i_sb, err);
 	brelse(bh);
-	return 0;
+	return err;
 }
 
 #ifdef CONFIG_EXT3_INDEX
diff -Nurap linux-2.6.9-100.orig/fs/ext3/super.c linux-2.6.9-ve023stab054/fs/ext3/super.c
--- linux-2.6.9-100.orig/fs/ext3/super.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext3/super.c	2011-06-15 19:26:19.000000000 +0400
@@ -149,20 +149,21 @@ static void ext3_handle_error(struct sup
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
-	if (test_opt (sb, ERRORS_PANIC))
-		panic ("EXT3-fs (device %s): panic forced after error\n",
-		       sb->s_id);
-	if (test_opt (sb, ERRORS_RO)) {
-		printk (KERN_CRIT "Remounting filesystem read-only\n");
-		sb->s_flags |= MS_RDONLY;
-	} else {
+	if (!test_opt (sb, ERRORS_CONT)) {
 		journal_t *journal = EXT3_SB(sb)->s_journal;
 
 		EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
 		if (journal)
 			journal_abort(journal, -EIO);
 	}
+	if (test_opt (sb, ERRORS_RO)) {
+		printk (KERN_CRIT "Remounting filesystem read-only\n");
+		sb->s_flags |= MS_RDONLY;
+	}
 	ext3_commit_super(sb, es, 1);
+	if (test_opt (sb, ERRORS_PANIC))
+		panic ("EXT3-fs (device %s): panic forced after error\n",
+				sb->s_id);
 }
 
 void ext3_error (struct super_block * sb, const char * function,
@@ -464,6 +465,21 @@ static struct inode *ext3_alloc_inode(st
 
 static void ext3_destroy_inode(struct inode *inode)
 {
+	if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
+		int i, imax;
+		unsigned int *p;
+
+		p = (unsigned int *)EXT3_I(inode);
+		imax = sizeof(struct ext3_inode_info) / sizeof(unsigned int);
+		printk("Inode %p: orphan list check failed!\n", EXT3_I(inode));
+		for (i = 0; i < imax; i++) {
+			if (i && ((i % 8) == 0))
+				printk("\n");
+			printk("%08x ", *p++);
+		}
+		printk("\n");
+		dump_stack();
+	}
 	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 }
 
@@ -1368,6 +1384,8 @@ static int ext3_fill_super (struct super
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
 		set_opt(sbi->s_mount_opt, ERRORS_RO);
+	else
+		set_opt(sbi->s_mount_opt, ERRORS_CONT);
 
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -2457,7 +2475,7 @@ static struct file_system_type ext3_fs_t
 	.name		= "ext3",
 	.get_sb		= ext3_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_VIRTUALIZED,
 };
 
 static int __init init_ext3_fs(void)
diff -Nurap linux-2.6.9-100.orig/fs/ext3/xattr_user.c linux-2.6.9-ve023stab054/fs/ext3/xattr_user.c
--- linux-2.6.9-100.orig/fs/ext3/xattr_user.c	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ext3/xattr_user.c	2011-06-15 19:26:19.000000000 +0400
@@ -43,7 +43,7 @@ ext3_xattr_user_get(struct inode *inode,
 		return -EINVAL;
 	if (!test_opt(inode->i_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	error = permission(inode, MAY_READ, NULL);
+	error = permission(inode, MAY_READ, NULL, NULL);
 	if (error)
 		return error;
 
@@ -63,7 +63,7 @@ ext3_xattr_user_set(struct inode *inode,
 	if ( !S_ISREG(inode->i_mode) &&
 	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
 		return -EPERM;
-	error = permission(inode, MAY_WRITE, NULL);
+	error = permission(inode, MAY_WRITE, NULL, NULL);
 	if (error)
 		return error;
 
diff -Nurap linux-2.6.9-100.orig/fs/fcntl.c linux-2.6.9-ve023stab054/fs/fcntl.c
--- linux-2.6.9-100.orig/fs/fcntl.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/fcntl.c	2011-06-15 19:26:21.000000000 +0400
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
+#include <linux/ve_owner.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -203,6 +204,7 @@ out_fput:
 	fput(file);
 	goto out;
 }
+EXPORT_SYMBOL(sys_dup2);
 
 asmlinkage long sys_dup(unsigned int fildes)
 {
@@ -221,6 +223,9 @@ static int setfl(int fd, struct file * f
 	struct inode * inode = filp->f_dentry->d_inode;
 	int error = 0;
 
+	if (!capable(CAP_SYS_RAWIO))
+		arg &= ~O_DIRECT;
+
 	/* O_APPEND cannot be cleared if the file is marked as append-only */
 	if (!(arg & O_APPEND) && IS_APPEND(inode))
 		return -EPERM;
@@ -264,6 +269,7 @@ static int setfl(int fd, struct file * f
 static void f_modown(struct file *filp, unsigned long pid,
                      uid_t uid, uid_t euid, int force)
 {
+	pid = comb_vpid_to_pid(pid);
 	write_lock_irq(&filp->f_owner.lock);
 	if (force || !filp->f_owner.pid) {
 		filp->f_owner.pid = pid;
@@ -330,7 +336,7 @@ static long do_fcntl(int fd, unsigned in
 		 * current syscall conventions, the only way
 		 * to fix this will be in libc.
 		 */
-		err = filp->f_owner.pid;
+		err = comb_pid_to_vpid(filp->f_owner.pid);
 		force_successful_syscall_return();
 		break;
 	case F_SETOWN:
@@ -481,6 +487,8 @@ static void send_sigio_to_task(struct ta
 
 void send_sigio(struct fown_struct *fown, int fd, int band)
 {
+	struct file *f;
+	struct ve_struct *env;
 	struct task_struct *p;
 	int pid;
 	
@@ -488,17 +496,21 @@ void send_sigio(struct fown_struct *fown
 	pid = fown->pid;
 	if (!pid)
 		goto out_unlock_fown;
-	
+
+	/* hack: fown's are always embedded in struct file */
+	f = container_of(fown, struct file, f_owner);
+	env = VE_OWNER_FILP(f);
+
 	read_lock(&tasklist_lock);
 	if (pid > 0) {
-		p = find_task_by_pid(pid);
-		if (p) {
+		p = find_task_by_pid_all(pid);
+		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, env)) {
 			send_sigio_to_task(p, fown, fd, band);
 		}
 	} else {
-		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
+		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env) {
 			send_sigio_to_task(p, fown, fd, band);
-		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
+		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env);
 	}
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
@@ -514,6 +526,8 @@ static void send_sigurg_to_task(struct t
 
 int send_sigurg(struct fown_struct *fown)
 {
+	struct file *f;
+	struct ve_struct *env;
 	struct task_struct *p;
 	int pid, ret = 0;
 	
@@ -524,16 +538,20 @@ int send_sigurg(struct fown_struct *fown
 
 	ret = 1;
 	
+	/* hack: fown's are always embedded in struct file */
+	f = container_of(fown, struct file, f_owner);
+	env = VE_OWNER_FILP(f);
+
 	read_lock(&tasklist_lock);
 	if (pid > 0) {
-		p = find_task_by_pid(pid);
-		if (p) {
+		p = find_task_by_pid_all(pid);
+		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, env)) {
 			send_sigurg_to_task(p, fown);
 		}
 	} else {
-		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
+		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env) {
 			send_sigurg_to_task(p, fown);
-		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
+		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env);
 	}
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
diff -Nurap linux-2.6.9-100.orig/fs/file.c linux-2.6.9-ve023stab054/fs/file.c
--- linux-2.6.9-100.orig/fs/file.c	2004-10-19 01:53:13.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/file.c	2011-06-15 19:26:21.000000000 +0400
@@ -12,9 +12,11 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/file.h>
+#include <linux/module.h>
 
 #include <asm/bitops.h>
 
+#include <ub/ub_mem.h>
 
 /*
  * Allocate an fd array, using kmalloc or vmalloc.
@@ -26,9 +28,9 @@ struct file ** alloc_fd_array(int num)
 	int size = num * sizeof(struct file *);
 
 	if (size <= PAGE_SIZE)
-		new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
+		new_fds = (struct file **) ub_kmalloc(size, GFP_KERNEL);
 	else 
-		new_fds = (struct file **) vmalloc(size);
+		new_fds = (struct file **) ub_vmalloc(size);
 	return new_fds;
 }
 
@@ -124,6 +126,7 @@ int expand_fd_array(struct files_struct 
 out:
 	return error;
 }
+EXPORT_SYMBOL(expand_fd_array);
 
 /*
  * Allocate an fdset array, using kmalloc or vmalloc.
@@ -135,9 +138,9 @@ fd_set * alloc_fdset(int num)
 	int size = num / 8;
 
 	if (size <= PAGE_SIZE)
-		new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
+		new_fdset = (fd_set *) ub_kmalloc(size, GFP_KERNEL);
 	else
-		new_fdset = (fd_set *) vmalloc(size);
+		new_fdset = (fd_set *) ub_vmalloc(size);
 	return new_fdset;
 }
 
@@ -225,4 +228,4 @@ out:
 	spin_lock(&files->file_lock);
 	return error;
 }
-
+EXPORT_SYMBOL(expand_fdset);
diff -Nurap linux-2.6.9-100.orig/fs/file_table.c linux-2.6.9-ve023stab054/fs/file_table.c
--- linux-2.6.9-100.orig/fs/file_table.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/file_table.c	2011-06-15 19:26:19.000000000 +0400
@@ -8,6 +8,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/ve_owner.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/smp_lock.h>
@@ -17,6 +18,8 @@
 #include <linux/mount.h>
 #include <linux/cdev.h>
 
+#include <ub/ub_misc.h>
+
 /* sysctl tunables... */
 struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
@@ -54,6 +57,8 @@ void filp_dtor(void * objp, struct kmem_
 
 static inline void file_free(struct file *f)
 {
+	ub_file_uncharge(f);
+	put_ve(VE_OWNER_FILP(f));
 	kmem_cache_free(filp_cachep, f);
 }
 
@@ -63,41 +68,56 @@ static inline void file_free(struct file
  */
 struct file *get_empty_filp(void)
 {
-static int old_max;
+	static int old_max;
 	struct file * f;
 
+#if 0
+	/*
+	 * nr_files limit is broken until some recent mainstream kernels.
+	 * someone very clever decided to do inc/dec from slab
+	 * constructor and destructor.
+	 * just remove this check at all, as we have UBC numfile limit.
+	 */
 	/*
 	 * Privileged users can go above max_files
 	 */
-	if (files_stat.nr_files < files_stat.max_files ||
-				capable(CAP_SYS_ADMIN)) {
-		f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
-		if (f) {
-			memset(f, 0, sizeof(*f));
-			if (security_file_alloc(f)) {
-				file_free(f);
-				goto fail;
-			}
-			eventpoll_init_file(f);
-			atomic_set(&f->f_count, 1);
-			f->f_uid = current->fsuid;
-			f->f_gid = current->fsgid;
-			f->f_owner.lock = RW_LOCK_UNLOCKED;
-			/* f->f_version: 0 */
-			INIT_LIST_HEAD(&f->f_list);
-			return f;
-		}
+	if (files_stat.nr_files >= files_stat.max_files &&
+			!capable(CAP_SYS_ADMIN))
+		goto over;
+#endif
+	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
+	if (f == NULL)
+		goto fail;
+
+	memset(f, 0, sizeof(*f));
+	if (ub_file_charge(f)) {
+		kmem_cache_free(filp_cachep, f);
+		goto fail;
 	}
 
+	SET_VE_OWNER_FILP(f, get_ve(get_exec_env()));
+	if (security_file_alloc(f)) {
+		file_free(f);
+		goto fail;
+	}
+	eventpoll_init_file(f);
+	atomic_set(&f->f_count, 1);
+	f->f_uid = current->fsuid;
+	f->f_gid = current->fsgid;
+	f->f_owner.lock = RW_LOCK_UNLOCKED;
+	/* f->f_version: 0 */
+	INIT_LIST_HEAD(&f->f_list);
+	return f;
+
+#if 0
+over:
 	/* Ran out of filps - report that */
-	if (files_stat.max_files >= old_max) {
+	if (files_stat.nr_files > old_max) {
 		printk(KERN_INFO "VFS: file-max limit %d reached\n",
-					files_stat.max_files);
-		old_max = files_stat.max_files;
-	} else {
-		/* Big problems... */
-		printk(KERN_WARNING "VFS: filp allocation failed\n");
+				files_stat.max_files);
+		old_max = files_stat.nr_files;
 	}
+#endif
 fail:
 	return NULL;
 }
diff -Nurap linux-2.6.9-100.orig/fs/filesystems.c linux-2.6.9-ve023stab054/fs/filesystems.c
--- linux-2.6.9-100.orig/fs/filesystems.c	2004-10-19 01:54:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/filesystems.c	2011-06-15 19:26:19.000000000 +0400
@@ -11,6 +11,7 @@
 #include <linux/kmod.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/ve_owner.h>
 #include <asm/uaccess.h>
 
 /*
@@ -20,8 +21,8 @@
  *	During the unload module must call unregister_filesystem().
  *	We can access the fields of list element if:
  *		1) spinlock is held or
- *		2) we hold the reference to the module.
- *	The latter can be guaranteed by call of try_module_get(); if it
+ *		2) we hold the reference to the element.
+ *	The latter can be guaranteed by call of try_filesystem(); if it
  *	returned 0 we must skip the element, otherwise we got the reference.
  *	Once the reference is obtained we can drop the spinlock.
  */
@@ -29,23 +30,51 @@
 static struct file_system_type *file_systems;
 static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
 
+int try_get_filesystem(struct file_system_type *fs)
+{
+	if (try_module_get(fs->owner)) {
+#ifdef CONFIG_VE
+		get_ve(VE_OWNER_FSTYPE(fs));
+#endif
+		return 1;
+	}
+	return 0;
+}
+
 /* WARNING: This can be used only if we _already_ own a reference */
 void get_filesystem(struct file_system_type *fs)
 {
+#ifdef CONFIG_VE
+	get_ve(VE_OWNER_FSTYPE(fs));
+#endif
 	__module_get(fs->owner);
 }
 
 void put_filesystem(struct file_system_type *fs)
 {
 	module_put(fs->owner);
+#ifdef CONFIG_VE
+	put_ve(VE_OWNER_FSTYPE(fs));
+#endif
+}
+
+static inline int check_ve_fstype(struct file_system_type *p,
+		struct ve_struct *env)
+{
+	return ((p->fs_flags & FS_VIRTUALIZED) ||
+			ve_accessible_strict(VE_OWNER_FSTYPE(p), env));
 }
 
-static struct file_system_type **find_filesystem(const char *name)
+static struct file_system_type **find_filesystem(const char *name,
+		struct ve_struct *env)
 {
 	struct file_system_type **p;
-	for (p=&file_systems; *p; p=&(*p)->next)
+	for (p=&file_systems; *p; p=&(*p)->next) {
+		if (!check_ve_fstype(*p, env))
+			continue;
 		if (strcmp((*p)->name,name) == 0)
 			break;
+	}
 	return p;
 }
 
@@ -72,8 +101,10 @@ int register_filesystem(struct file_syst
 	if (fs->next)
 		return -EBUSY;
 	INIT_LIST_HEAD(&fs->fs_supers);
+	if (VE_OWNER_FSTYPE(fs) == NULL)
+		SET_VE_OWNER_FSTYPE(fs, get_ve0());
 	write_lock(&file_systems_lock);
-	p = find_filesystem(fs->name);
+	p = find_filesystem(fs->name, VE_OWNER_FSTYPE(fs));
 	if (*p)
 		res = -EBUSY;
 	else
@@ -130,11 +161,14 @@ static int fs_index(const char __user * 
 
 	err = -EINVAL;
 	read_lock(&file_systems_lock);
-	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
+	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next) {
+		if (!check_ve_fstype(tmp, get_exec_env()))
+			continue;
 		if (strcmp(tmp->name,name) == 0) {
 			err = index;
 			break;
 		}
+		index++;
 	}
 	read_unlock(&file_systems_lock);
 	putname(name);
@@ -147,9 +181,15 @@ static int fs_name(unsigned int index, c
 	int len, res;
 
 	read_lock(&file_systems_lock);
-	for (tmp = file_systems; tmp; tmp = tmp->next, index--)
-		if (index <= 0 && try_module_get(tmp->owner))
-			break;
+	for (tmp = file_systems; tmp; tmp = tmp->next) {
+		if (!check_ve_fstype(tmp, get_exec_env()))
+			continue;
+		if (!index) {
+			if (try_get_filesystem(tmp))
+				break;
+		} else
+			index--;
+	}
 	read_unlock(&file_systems_lock);
 	if (!tmp)
 		return -EINVAL;
@@ -167,8 +207,9 @@ static int fs_maxindex(void)
 	int index;
 
 	read_lock(&file_systems_lock);
-	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
-		;
+	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next)
+		if (check_ve_fstype(tmp, get_exec_env()))
+			index++;
 	read_unlock(&file_systems_lock);
 	return index;
 }
@@ -204,9 +245,10 @@ int get_filesystem_list(char * buf)
 	read_lock(&file_systems_lock);
 	tmp = file_systems;
 	while (tmp && len < PAGE_SIZE - 80) {
-		len += sprintf(buf+len, "%s\t%s\n",
-			(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
-			tmp->name);
+		if (check_ve_fstype(tmp, get_exec_env()))
+			len += sprintf(buf+len, "%s\t%s\n",
+				(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
+				tmp->name);
 		tmp = tmp->next;
 	}
 	read_unlock(&file_systems_lock);
@@ -218,14 +260,14 @@ struct file_system_type *get_fs_type(con
 	struct file_system_type *fs;
 
 	read_lock(&file_systems_lock);
-	fs = *(find_filesystem(name));
-	if (fs && !try_module_get(fs->owner))
+	fs = *(find_filesystem(name, get_exec_env()));
+	if (fs && !try_get_filesystem(fs))
 		fs = NULL;
 	read_unlock(&file_systems_lock);
 	if (!fs && (request_module("%s", name) == 0)) {
 		read_lock(&file_systems_lock);
-		fs = *(find_filesystem(name));
-		if (fs && !try_module_get(fs->owner))
+		fs = *(find_filesystem(name, get_exec_env()));
+		if (fs && !try_get_filesystem(fs))
 			fs = NULL;
 		read_unlock(&file_systems_lock);
 	}
@@ -233,3 +275,5 @@ struct file_system_type *get_fs_type(con
 }
 
 EXPORT_SYMBOL(get_fs_type);
+EXPORT_SYMBOL(get_filesystem);
+EXPORT_SYMBOL(put_filesystem);
diff -Nurap linux-2.6.9-100.orig/fs/fs-writeback.c linux-2.6.9-ve023stab054/fs/fs-writeback.c
--- linux-2.6.9-100.orig/fs/fs-writeback.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/fs-writeback.c	2011-06-15 19:26:18.000000000 +0400
@@ -495,32 +495,6 @@ static void set_sb_syncing(int val)
 	spin_unlock(&sb_lock);
 }
 
-/*
- * Find a superblock with inodes that need to be synced
- */
-static struct super_block *get_super_to_sync(void)
-{
-	struct super_block *sb;
-restart:
-	spin_lock(&sb_lock);
-	sb = sb_entry(super_blocks.prev);
-	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
-		if (sb->s_syncing)
-			continue;
-		sb->s_syncing = 1;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (!sb->s_root) {
-			drop_super(sb);
-			goto restart;
-		}
-		return sb;
-	}
-	spin_unlock(&sb_lock);
-	return NULL;
-}
-
 /**
  * sync_inodes
  *
@@ -539,23 +513,39 @@ restart:
  * outstanding dirty inodes, the writeback goes block-at-a-time within the
  * filesystem's write_inode().  This is extremely slow.
  */
-void sync_inodes(int wait)
+static void __sync_inodes(int wait)
 {
 	struct super_block *sb;
 
-	set_sb_syncing(0);
-	while ((sb = get_super_to_sync()) != NULL) {
-		sync_inodes_sb(sb, 0);
-		sync_blockdev(sb->s_bdev);
-		drop_super(sb);
+	spin_lock(&sb_lock);
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {	
+		if (sb->s_syncing)
+			continue;
+		sb->s_syncing = 1;
+		sb->s_count++;
+		spin_unlock(&sb_lock);				
+		down_read(&sb->s_umount); 
+		if (sb->s_root) {
+			sync_inodes_sb(sb, wait);
+			sync_blockdev(sb->s_bdev);
+		}
+		up_read(&sb->s_umount);	
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
 	}
+	spin_unlock(&sb_lock);
+}
+
+void sync_inodes(int wait)
+{
+	set_sb_syncing(0);
+	__sync_inodes(0);
+	
 	if (wait) {
 		set_sb_syncing(0);
-		while ((sb = get_super_to_sync()) != NULL) {
-			sync_inodes_sb(sb, 1);
-			sync_blockdev(sb->s_bdev);
-			drop_super(sb);
-		}
+		__sync_inodes(1);
 	}
 }
 
diff -Nurap linux-2.6.9-100.orig/fs/hfs/inode.c linux-2.6.9-ve023stab054/fs/hfs/inode.c
--- linux-2.6.9-100.orig/fs/hfs/inode.c	2011-06-09 19:23:05.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/hfs/inode.c	2011-06-15 19:26:19.000000000 +0400
@@ -517,11 +517,11 @@ void hfs_clear_inode(struct inode *inode
 }
 
 static int hfs_permission(struct inode *inode, int mask,
-			  struct nameidata *nd)
+			  struct nameidata *nd, struct exec_perm *exec_perm)
 {
 	if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
 		return 0;
-	return vfs_permission(inode, mask);
+	return vfs_permission(inode, mask, NULL);
 }
 
 static int hfs_file_open(struct inode *inode, struct file *file)
diff -Nurap linux-2.6.9-100.orig/fs/hfsplus/inode.c linux-2.6.9-ve023stab054/fs/hfsplus/inode.c
--- linux-2.6.9-100.orig/fs/hfsplus/inode.c	2004-10-19 01:53:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/hfsplus/inode.c	2011-06-15 19:26:19.000000000 +0400
@@ -252,15 +252,19 @@ static void hfsplus_set_perms(struct ino
 	perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
 }
 
-static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int hfsplus_permission(struct inode *inode, int mask,
+		struct nameidata *nd, struct exec_perm *exec_perm)
 {
 	/* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
 	 * open_exec has the same test, so it's still not executable, if a x bit
 	 * is set fall back to standard permission check.
+	 *
+	 * The comment above and the check below don't make much sense
+	 * with S_ISREG condition...  --SAW
 	 */
 	if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111))
 		return 0;
-	return vfs_permission(inode, mask);
+	return vfs_permission(inode, mask, exec_perm);
 }
 
 
diff -Nurap linux-2.6.9-100.orig/fs/hpfs/namei.c linux-2.6.9-ve023stab054/fs/hpfs/namei.c
--- linux-2.6.9-100.orig/fs/hpfs/namei.c	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/hpfs/namei.c	2011-06-15 19:26:19.000000000 +0400
@@ -415,7 +415,7 @@ again:
 		d_drop(dentry);
 		spin_lock(&dentry->d_lock);
 		if (atomic_read(&dentry->d_count) > 1 ||
-		    permission(inode, MAY_WRITE, NULL) ||
+		    permission(inode, MAY_WRITE, NULL, NULL) ||
 		    !S_ISREG(inode->i_mode) ||
 		    get_write_access(inode)) {
 			spin_unlock(&dentry->d_lock);
diff -Nurap linux-2.6.9-100.orig/fs/hugetlbfs/inode.c linux-2.6.9-ve023stab054/fs/hugetlbfs/inode.c
--- linux-2.6.9-100.orig/fs/hugetlbfs/inode.c	2011-06-09 19:22:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/hugetlbfs/inode.c	2011-06-15 19:26:19.000000000 +0400
@@ -224,6 +224,7 @@ static void hugetlbfs_delete_inode(struc
 	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(inode->i_sb);
 
 	hlist_del_init(&inode->i_hash);
+	list_del(&inode->i_sb_list);
 	list_del_init(&inode->i_list);
 	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
@@ -266,6 +267,7 @@ static void hugetlbfs_forget_inode(struc
 	inodes_stat.nr_unused--;
 	hlist_del_init(&inode->i_hash);
 out_truncate:
+	list_del(&inode->i_sb_list);
 	list_del_init(&inode->i_list);
 	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
@@ -758,7 +760,7 @@ struct file *hugetlb_zero_setup(size_t s
 	struct inode *inode;
 	struct dentry *dentry, *root;
 	struct qstr quick_string;
-	char buf[16];
+	char buf[64];
 
 	if (!can_do_hugetlb_shm())
 		return ERR_PTR(-EPERM);
@@ -770,7 +772,8 @@ struct file *hugetlb_zero_setup(size_t s
 		return ERR_PTR(-ENOMEM);
 
 	root = hugetlbfs_vfsmount->mnt_root;
-	snprintf(buf, 16, "%lu", hugetlbfs_counter());
+	snprintf(buf, sizeof(buf), "VE%d-%d",
+			get_exec_env()->veid, hugetlbfs_counter());
 	quick_string.name = buf;
 	quick_string.len = strlen(quick_string.name);
 	quick_string.hash = 0;
diff -Nurap linux-2.6.9-100.orig/fs/inode.c linux-2.6.9-ve023stab054/fs/inode.c
--- linux-2.6.9-100.orig/fs/inode.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/inode.c	2011-06-15 19:26:22.000000000 +0400
@@ -9,8 +9,10 @@
 #include <linux/mm.h>
 #include <linux/dcache.h>
 #include <linux/init.h>
+#include <linux/kernel_stat.h>
 #include <linux/quotaops.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/writeback.h>
 #include <linux/module.h>
 #include <linux/backing-dev.h>
@@ -22,6 +24,8 @@
 #include <linux/cdev.h>
 #include <linux/bootmem.h>
 #include <linux/audit.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
 
 /*
  * This is needed for the following functions:
@@ -98,13 +102,15 @@ DECLARE_MUTEX(iprune_sem);
  */
 struct inodes_stat_t inodes_stat;
 
-static kmem_cache_t * inode_cachep;
+kmem_cache_t * inode_cachep;
+
+static struct address_space_operations vfs_empty_aops;
+struct inode_operations vfs_empty_iops;
+static struct file_operations vfs_empty_fops;
+EXPORT_SYMBOL(vfs_empty_iops);
 
 static struct inode *alloc_inode(struct super_block *sb)
 {
-	static struct address_space_operations empty_aops;
-	static struct inode_operations empty_iops;
-	static struct file_operations empty_fops;
 	struct inode *inode;
 
 	if (sb->s_op->alloc_inode)
@@ -120,8 +126,8 @@ static struct inode *alloc_inode(struct 
 		inode->i_flags = 0;
 		atomic_set(&inode->i_count, 1);
 		inode->i_sock = 0;
-		inode->i_op = &empty_iops;
-		inode->i_fop = &empty_fops;
+		inode->i_op = &vfs_empty_iops;
+		inode->i_fop = &vfs_empty_fops;
 		inode->i_nlink = 1;
 		atomic_set(&inode->i_writecount, 0);
 		inode->i_size = 0;
@@ -145,7 +151,7 @@ static struct inode *alloc_inode(struct 
 			return NULL;
 		}
 
-		mapping->a_ops = &empty_aops;
+		mapping->a_ops = &vfs_empty_aops;
  		mapping->host = inode;
 		mapping->flags = 0;
 		mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
@@ -177,6 +183,7 @@ void destroy_inode(struct inode *inode) 
 		BUG();
 	audit_inode_free(inode);
 	security_inode_free(inode);
+	inode->i_op = (struct inode_operations *)0xbad3ae;
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
@@ -304,10 +311,11 @@ static void dispose_list(struct list_hea
 /*
  * Invalidate all inodes for a device.
  */
-static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose, int pages)
+static int invalidate_list(struct list_head *head, struct list_head * dispose,
+			int pages, int verify)
 {
 	struct list_head *next;
-	int busy = 0, count = 0;
+	int busy = 0, count = 0, print_once = 1;
 	struct inode * inode;
 	struct inode * toput_inode = NULL;
 
@@ -318,9 +326,7 @@ static int invalidate_list(struct list_h
 		next = next->next;
 		if (tmp == head)
 			break;
-		inode = list_entry(tmp, struct inode, i_list);
-		if (inode->i_sb != sb)
-			continue;
+		inode = list_entry(tmp, struct inode, i_sb_list);
 		if (pages) {
 			__iget(inode);
 			spin_unlock(&inode_lock);
@@ -331,6 +337,7 @@ static int invalidate_list(struct list_h
 		} else {
 			invalidate_inode_buffers(inode);
 			if (!atomic_read(&inode->i_count)) {
+				list_del(&inode->i_sb_list);
 				list_move(&inode->i_list, dispose);
 				inode->i_state |= I_FREEING;
 				count++;
@@ -338,6 +345,74 @@ static int invalidate_list(struct list_h
 			}
 		}
 		busy = 1;
+
+		if (!verify)
+			continue;
+
+		if (print_once) {
+			struct super_block *sb = inode->i_sb;
+			printk("VFS: Busy inodes after unmount. "
+				"sb = %p, fs type = %s, sb count = %d, "
+				"sb->s_root = %s, pages = %d\n", sb,
+				(sb->s_type != NULL) ? sb->s_type->name : "",
+				sb->s_count,
+				(sb->s_root != NULL) ?
+				(char *)sb->s_root->d_name.name : "",
+				pages);
+			print_once = 0;
+		}
+
+		{
+			struct dentry *d;
+			int i;
+
+			printk("inode = %p, inode->i_count = %d, "
+					"inode->i_nlink = %d, "
+					"inode->i_mode = %d, "
+					"inode->i_state = %ld, "
+					"inode->i_flags = %d, "
+					"inode->i_devices.next = %p, "
+					"inode->i_devices.prev = %p, "
+					"inode->i_ino = %ld\n",
+					inode,
+					atomic_read(&inode->i_count),
+					inode->i_nlink,
+					inode->i_mode,
+					inode->i_state,
+					inode->i_flags,
+					inode->i_devices.next,
+					inode->i_devices.prev,
+					inode->i_ino);
+			printk("inode dump: ");
+			for (i = 0; i < sizeof(*inode); i++)
+				printk("%2.2x ", *((u_char *)inode + i));
+			printk("\n");
+			list_for_each_entry(d, &inode->i_dentry, d_alias) {
+				printk("  d_alias %s d_count=%d d_flags=%x\n",
+					d->d_name.name,
+					atomic_read(&d->d_count), d->d_flags);
+				for (i = 0; i < sizeof(*d); i++)
+					printk("%2.2x ", *((u_char *)d + i));
+				printk("\n");
+			}
+
+			spin_lock(&vfsmount_lock);
+			list_for_each(tmp, &current->namespace->list) {
+				struct vfsmount *mnt;
+				mnt = list_entry(tmp,
+						struct vfsmount, mnt_list);
+				if (mnt->mnt_sb != inode->i_sb)
+					continue;
+				printk("mnt=%p count=%d flags=%x exp_mask=%x\n",
+					mnt, atomic_read(&mnt->mnt_count),
+					mnt->mnt_flags,
+					mnt->mnt_expiry_mark);
+				for (i = 0; i < sizeof(*mnt); i++)
+					printk("%2.2x ", *((u_char *)mnt + i));
+				printk("\n");
+			}
+			spin_unlock(&vfsmount_lock);
+		}
 	}
 	if (pages) {
 		spin_unlock(&inode_lock);
@@ -349,17 +424,14 @@ static int invalidate_list(struct list_h
 	return busy;
 }
 
-int invalidate_inodes_and_pages(struct super_block * sb)
+int invalidate_inodes_and_pages(struct super_block * sb, int verify)
 {
 	int busy;
 	LIST_HEAD(throw_away);
                                                                                                    
 	down(&iprune_sem);
 	spin_lock(&inode_lock);
-	busy = invalidate_list(&inode_in_use, sb, &throw_away, 1);
-	busy |= invalidate_list(&inode_unused, sb, &throw_away, 1);
-	busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, 1);
-	busy |= invalidate_list(&sb->s_io, sb, &throw_away, 1);
+	busy = invalidate_list(&sb->s_inodes, &throw_away, 1, verify);
 	spin_unlock(&inode_lock);
 	up(&iprune_sem);
 	return busy;
@@ -381,17 +453,14 @@ int invalidate_inodes_and_pages(struct s
  *	fails because there are busy inodes then a non zero value is returned.
  *	If the discard is successful all the inodes have been discarded.
  */
-int invalidate_inodes(struct super_block * sb)
+int invalidate_inodes(struct super_block * sb, int verify)
 {
 	int busy;
 	LIST_HEAD(throw_away);
 
 	down(&iprune_sem);
 	spin_lock(&inode_lock);
-	busy = invalidate_list(&inode_in_use, sb, &throw_away, 0);
-	busy |= invalidate_list(&inode_unused, sb, &throw_away, 0);
-	busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, 0);
-	busy |= invalidate_list(&sb->s_io, sb, &throw_away, 0);
+	busy = invalidate_list(&sb->s_inodes, &throw_away, 0, verify);
 	spin_unlock(&inode_lock);
 
 	dispose_list(&throw_away);
@@ -420,7 +489,7 @@ int __invalidate_device(struct block_dev
 		 * hold).
 		 */
 		shrink_dcache_sb(sb);
-		res = invalidate_inodes(sb);
+		res = invalidate_inodes(sb, 0);
 		drop_super(sb);
 	}
 	invalidate_bdev(bdev, 0);
@@ -490,6 +559,7 @@ static void prune_icache(int nr_to_scan)
 			if (!can_unuse(inode))
 				continue;
 		}
+		list_del(&inode->i_sb_list);
 		list_move(&inode->i_list, &freeable);
 		inode->i_state |= I_FREEING;
 		nr_pruned++;
@@ -517,6 +587,7 @@ static void prune_icache(int nr_to_scan)
  */
 static int shrink_icache_memory(int nr, unsigned int gfp_mask)
 {
+	KSTAT_PERF_ENTER(shrink_icache)
 	if (nr) {
 		/*
 		 * Nasty deadlock avoidance.  We may hold various FS locks,
@@ -527,6 +598,7 @@ static int shrink_icache_memory(int nr, 
 			return -1;
 		prune_icache(nr);
 	}
+	KSTAT_PERF_LEAVE(shrink_icache)
 	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
 
@@ -601,6 +673,7 @@ struct inode *new_inode(struct super_blo
 	if (inode) {
 		spin_lock(&inode_lock);
 		inodes_stat.nr_inodes++;
+		list_add(&inode->i_sb_list, &sb->s_inodes);
 		list_add(&inode->i_list, &inode_in_use);
 		inode->i_ino = ++last_ino;
 		inode->i_state = 0;
@@ -658,6 +731,7 @@ static struct inode * get_new_inode(stru
 				goto set_failed;
 
 			inodes_stat.nr_inodes++;
+			list_add(&inode->i_sb_list, &sb->s_inodes);
 			list_add(&inode->i_list, &inode_in_use);
 			hlist_add_head(&inode->i_hash, head);
 			inode->i_state = I_LOCK|I_NEW;
@@ -706,6 +780,7 @@ static struct inode * get_new_inode_fast
 		if (!old) {
 			inode->i_ino = ino;
 			inodes_stat.nr_inodes++;
+			list_add(&inode->i_sb_list, &sb->s_inodes);
 			list_add(&inode->i_list, &inode_in_use);
 			hlist_add_head(&inode->i_hash, head);
 			inode->i_state = I_LOCK|I_NEW;
@@ -1121,6 +1196,7 @@ void generic_delete_inode(struct inode *
 {
 	struct super_operations *op = inode->i_sb->s_op;
 
+	list_del(&inode->i_sb_list);
 	list_del_init(&inode->i_list);
 	inode->i_state|=I_FREEING;
 	inodes_stat.nr_inodes--;
@@ -1163,6 +1239,7 @@ static void generic_forget_inode(struct 
 			return;
 		}
 		inode->i_state |= I_WILL_FREE;
+		BUG_ON(inode->i_state & I_LOCK);
 		spin_unlock(&inode_lock);
 		write_inode_now(inode, 1);
 		spin_lock(&inode_lock);
@@ -1170,6 +1247,7 @@ static void generic_forget_inode(struct 
 		inodes_stat.nr_unused--;
 		hlist_del_init(&inode->i_hash);
 	}
+	list_del(&inode->i_sb_list);
 	list_del_init(&inode->i_list);
 	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
@@ -1351,33 +1429,15 @@ int remove_inode_dquot_ref(struct inode 
 void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head)
 {
 	struct inode *inode;
-	struct list_head *act_head;
 
 	if (!sb->dq_op)
 		return;	/* nothing to do */
-	spin_lock(&inode_lock);	/* This lock is for inodes code */
 
+	spin_lock(&inode_lock);	/* This lock is for inodes code */
 	/* We hold dqptr_sem so we are safe against the quota code */
-	list_for_each(act_head, &inode_in_use) {
-		inode = list_entry(act_head, struct inode, i_list);
-		if (inode->i_sb == sb && !IS_NOQUOTA(inode))
-			remove_inode_dquot_ref(inode, type, tofree_head);
-	}
-	list_for_each(act_head, &inode_unused) {
-		inode = list_entry(act_head, struct inode, i_list);
-		if (inode->i_sb == sb && !IS_NOQUOTA(inode))
-			remove_inode_dquot_ref(inode, type, tofree_head);
-	}
-	list_for_each(act_head, &sb->s_dirty) {
-		inode = list_entry(act_head, struct inode, i_list);
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list)
 		if (!IS_NOQUOTA(inode))
 			remove_inode_dquot_ref(inode, type, tofree_head);
-	}
-	list_for_each(act_head, &sb->s_io) {
-		inode = list_entry(act_head, struct inode, i_list);
-		if (!IS_NOQUOTA(inode))
-			remove_inode_dquot_ref(inode, type, tofree_head);
-	}
 	spin_unlock(&inode_lock);
 }
 
@@ -1493,7 +1553,8 @@ void __init inode_init(unsigned long mem
 
 	/* inode slab cache */
 	inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
-				0, SLAB_PANIC, init_once, NULL);
+				0, SLAB_RECLAIM_ACCOUNT | SLAB_PANIC,
+				init_once, NULL);
 	set_shrinker(DEFAULT_SEEKS, shrink_icache_memory);
 	audit_filesystem_init();
 }
diff -Nurap linux-2.6.9-100.orig/fs/isofs/compress.c linux-2.6.9-ve023stab054/fs/isofs/compress.c
--- linux-2.6.9-100.orig/fs/isofs/compress.c	2004-10-19 01:53:13.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/isofs/compress.c	2011-06-15 19:26:18.000000000 +0400
@@ -147,8 +147,14 @@ static int zisofs_readpage(struct file *
 	cend = le32_to_cpu(*(__le32 *)(bh->b_data + (blockendptr & bufmask)));
 	brelse(bh);
 
+	if (cstart > cend)
+		goto eio;
+		
 	csize = cend-cstart;
 
+	if (csize > deflateBound(1UL << zisofs_block_shift))
+		goto eio;
+
 	/* Now page[] contains an array of pages, any of which can be NULL,
 	   and the locks on which we hold.  We should now read the data and
 	   release the pages.  If the pages are NULL the decompressed data
diff -Nurap linux-2.6.9-100.orig/fs/jbd/commit.c linux-2.6.9-ve023stab054/fs/jbd/commit.c
--- linux-2.6.9-100.orig/fs/jbd/commit.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/jbd/commit.c	2011-06-15 19:26:19.000000000 +0400
@@ -188,7 +188,7 @@ write_out_data:
 			__journal_file_buffer(jh, commit_transaction,
 						BJ_Locked);
 			jbd_unlock_bh_state(bh);
-			if (bufs == ARRAY_SIZE(wbuf)) {
+			if (bufs == journal->j_wbufsize) {
 				spin_unlock(&journal->j_list_lock);
 				journal_do_submit_data(wbuf, bufs);
 				bufs = 0;
@@ -231,7 +231,7 @@ void journal_commit_transaction(journal_
 {
 	transaction_t *commit_transaction;
 	struct journal_head *jh, *new_jh, *descriptor;
-	struct buffer_head *wbuf[64];
+	struct buffer_head **wbuf = journal->j_wbuf;
 	int bufs;
 	int flags;
 	int err;
@@ -377,8 +377,6 @@ void journal_commit_transaction(journal_
 		if (buffer_locked(bh)) {
 			spin_unlock(&journal->j_list_lock);
 			wait_on_buffer(bh);
-			if (unlikely(!buffer_uptodate(bh)))
-				err = -EIO;
 			spin_lock(&journal->j_list_lock);
 		}
 		if (!inverted_lock(journal, bh)) {
@@ -435,6 +433,7 @@ void journal_commit_transaction(journal_
 	descriptor = NULL;
 	bufs = 0;
 	while (commit_transaction->t_buffers) {
+		int error;
 
 		/* Find the next buffer to be journaled... */
 
@@ -495,11 +494,12 @@ void journal_commit_transaction(journal_
 
 		/* Where is the buffer to be written? */
 
-		err = journal_next_log_block(journal, &blocknr);
+		error = journal_next_log_block(journal, &blocknr);
 		/* If the block mapping failed, just abandon the buffer
 		   and repeat this loop: we'll fall into the
 		   refile-on-abort condition above. */
-		if (err) {
+		if (error) {
+			err = error;
 			__journal_abort_hard(journal);
 			continue;
 		}
@@ -558,7 +558,7 @@ void journal_commit_transaction(journal_
 		/* If there's no more to do, or if the descriptor is full,
 		   let the IO rip! */
 
-		if (bufs == ARRAY_SIZE(wbuf) ||
+		if (bufs == journal->j_wbufsize ||
 		    commit_transaction->t_buffers == NULL ||
 		    space_left < sizeof(journal_block_tag_t) + 16) {
 
@@ -685,6 +685,8 @@ wait_for_iobuf:
 
 	jbd_debug(3, "JBD: commit phase 6\n");
 
+	if (err)
+		goto skip_commit;
 	if (is_journal_aborted(journal))
 		goto skip_commit;
 
@@ -755,8 +757,13 @@ wait_for_iobuf:
 
 skip_commit: /* The journal should be unlocked by now. */
 
-	if (err)
+	if (err) {
+		char b[BDEVNAME_SIZE];
+
+		printk(KERN_ERR "Error %d writing journal on %s\n",
+				err, bdevname(journal->j_dev, b));
 		__journal_abort_hard(journal);
+	}
 
 	/*
 	 * Call any callbacks that had been registered for handles in this
diff -Nurap linux-2.6.9-100.orig/fs/jbd/journal.c linux-2.6.9-ve023stab054/fs/jbd/journal.c
--- linux-2.6.9-100.orig/fs/jbd/journal.c	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/jbd/journal.c	2011-06-15 19:26:19.000000000 +0400
@@ -153,6 +153,9 @@ int kjournald(void *arg)
 	spin_lock(&journal->j_state_lock);
 
 loop:
+	if (journal->j_flags & JFS_UNMOUNT)
+		goto end_loop;
+
 	jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
 		journal->j_commit_sequence, journal->j_commit_request);
 
@@ -162,11 +165,11 @@ loop:
 		del_timer_sync(journal->j_commit_timer);
 		journal_commit_transaction(journal);
 		spin_lock(&journal->j_state_lock);
-		goto end_loop;
+		goto loop;
 	}
 
 	wake_up(&journal->j_wait_done_commit);
-	if (current->flags & PF_FREEZE) {
+	if (test_thread_flag(TIF_FREEZE)) {
 		/*
 		 * The simpler the better. Flushing journal isn't a
 		 * good idea, because that depends on threads that may
@@ -174,7 +177,7 @@ loop:
 		 */
 		jbd_debug(1, "Now suspending kjournald\n");
 		spin_unlock(&journal->j_state_lock);
-		refrigerator(PF_FREEZE);
+		refrigerator();
 		spin_lock(&journal->j_state_lock);
 	} else {
 		/*
@@ -212,10 +215,9 @@ loop:
 		journal->j_commit_request = transaction->t_tid;
 		jbd_debug(1, "woke because of timeout\n");
 	}
-end_loop:
-	if (!(journal->j_flags & JFS_UNMOUNT))
-		goto loop;
+	goto loop;
 
+end_loop:
 	spin_unlock(&journal->j_state_lock);
 	del_timer_sync(journal->j_commit_timer);
 	journal->j_task = NULL;
@@ -224,10 +226,16 @@ end_loop:
 	return 0;
 }
 
-static void journal_start_thread(journal_t *journal)
+static int journal_start_thread(journal_t *journal)
 {
-	kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
+	int err;
+
+	err = kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
+	if (err < 0)
+		return err;
+
 	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
+	return 0;
 }
 
 static void journal_kill_thread(journal_t *journal)
@@ -723,6 +731,7 @@ journal_t * journal_init_dev(struct bloc
 {
 	journal_t *journal = journal_init_common();
 	struct buffer_head *bh;
+	int n;
 
 	if (!journal)
 		return NULL;
@@ -738,6 +747,17 @@ journal_t * journal_init_dev(struct bloc
 	journal->j_sb_buffer = bh;
 	journal->j_superblock = (journal_superblock_t *)bh->b_data;
 
+	/* journal descriptor can store up to n blocks -bzzz */
+	n = journal->j_blocksize / sizeof(journal_block_tag_t);
+	journal->j_wbufsize = n;
+	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
+	if (!journal->j_wbuf) {
+		printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
+			__FUNCTION__);
+		kfree(journal);
+		journal = NULL;
+	}
+
 	return journal;
 }
  
@@ -754,6 +774,7 @@ journal_t * journal_init_inode (struct i
 	struct buffer_head *bh;
 	journal_t *journal = journal_init_common();
 	int err;
+	int n;
 	unsigned long blocknr;
 
 	if (!journal)
@@ -770,6 +791,17 @@ journal_t * journal_init_inode (struct i
 	journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
 	journal->j_blocksize = inode->i_sb->s_blocksize;
 
+	/* journal descriptor can store up to n blocks -bzzz */
+	n = journal->j_blocksize / sizeof(journal_block_tag_t);
+	journal->j_wbufsize = n;
+	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
+	if (!journal->j_wbuf) {
+		printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
+			__FUNCTION__);
+		kfree(journal);
+		return NULL;
+	}
+
 	err = journal_bmap(journal, 0, &blocknr);
 	/* If that failed, give up */
 	if (err) {
@@ -829,8 +861,7 @@ static int journal_reset(journal_t *jour
 
 	/* Add the dynamic fields and write it to disk. */
 	journal_update_superblock(journal, 1);
-	journal_start_thread(journal);
-	return 0;
+	return journal_start_thread(journal);
 }
 
 /** 
@@ -1143,6 +1174,7 @@ void journal_destroy(journal_t *journal)
 		iput(journal->j_inode);
 	if (journal->j_revoke)
 		journal_destroy_revoke(journal);
+	kfree(journal->j_wbuf);
 	kfree(journal);
 }
 
@@ -1493,7 +1525,7 @@ void __journal_abort_soft (journal_t *jo
  * entered abort state during the update.
  *
  * Recursive transactions are not disturbed by journal abort until the
- * final journal_stop, which will receive the -EIO error.
+ * final journal_stop.
  *
  * Finally, the journal_abort call allows the caller to supply an errno
  * which will be recorded (if possible) in the journal superblock.  This
diff -Nurap linux-2.6.9-100.orig/fs/jbd/transaction.c linux-2.6.9-ve023stab054/fs/jbd/transaction.c
--- linux-2.6.9-100.orig/fs/jbd/transaction.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/jbd/transaction.c	2011-06-15 19:26:22.000000000 +0400
@@ -493,23 +493,21 @@ void journal_unlock_updates (journal_t *
  */
 static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
 {
-	struct buffer_head *bh = jh2bh(jh);
 	int jlist;
 
-	if (buffer_dirty(bh)) {
-		/* If this buffer is one which might reasonably be dirty
-		 * --- ie. data, or not part of this journal --- then
-		 * we're OK to leave it alone, but otherwise we need to
-		 * move the dirty bit to the journal's own internal
-		 * JBDDirty bit. */
-		jlist = jh->b_jlist;
-
-		if (jlist == BJ_Metadata || jlist == BJ_Reserved || 
-		    jlist == BJ_Shadow || jlist == BJ_Forget) {
-			if (test_clear_buffer_dirty(jh2bh(jh))) {
-				set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
-			}
-		}
+	/* If this buffer is one which might reasonably be dirty
+	 * --- ie. data, or not part of this journal --- then
+	 * we're OK to leave it alone, but otherwise we need to
+	 * move the dirty bit to the journal's own internal
+	 * JBDDirty bit. */
+	jlist = jh->b_jlist;
+
+	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+	    jlist == BJ_Shadow || jlist == BJ_Forget) {
+		struct buffer_head *bh = jh2bh(jh);
+
+		if (test_clear_buffer_dirty(bh))
+			set_buffer_jbddirty(bh);
 	}
 }
 
@@ -577,9 +575,14 @@ repeat:
 			if (jh->b_next_transaction)
 				J_ASSERT_JH(jh, jh->b_next_transaction ==
 							transaction);
-			JBUFFER_TRACE(jh, "Unexpected dirty buffer");
-			jbd_unexpected_dirty_buffer(jh);
- 		}
+		}
+		/*
+		 * In any case we need to clean the dirty flag and we must
+		 * do it under the buffer lock to be sure we don't race
+		 * with running write-out.
+		 */
+		JBUFFER_TRACE(jh, "Unexpected dirty buffer");
+		jbd_unexpected_dirty_buffer(jh);
  	}
 
 	unlock_buffer(bh);
@@ -1456,7 +1459,8 @@ int journal_stop(handle_t *handle)
 		 * Special case: JFS_SYNC synchronous updates require us
 		 * to wait for the commit to complete.  
 		 */
-		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
+		if (handle->h_sync && !(current->flags & PF_MEMALLOC) &&
+			       !test_thread_flag(TIF_MEMDIE))
 			err = log_wait_commit(journal, tid);
 	} else {
 		spin_unlock(&transaction->t_handle_lock);
diff -Nurap linux-2.6.9-100.orig/fs/jffs2/background.c linux-2.6.9-ve023stab054/fs/jffs2/background.c
--- linux-2.6.9-100.orig/fs/jffs2/background.c	2004-10-19 01:53:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/jffs2/background.c	2011-06-15 19:26:18.000000000 +0400
@@ -93,8 +93,8 @@ static int jffs2_garbage_collect_thread(
 			schedule();
 		}
 
-		if (current->flags & PF_FREEZE) {
-			refrigerator(0);
+		if (test_thread_flag(TIF_FREEZE)) {
+			refrigerator();
 			/* refrigerator() should recalc sigpending for us
 			   but doesn't. No matter - allow_signal() will. */
 			continue;
diff -Nurap linux-2.6.9-100.orig/fs/jfs/acl.c linux-2.6.9-ve023stab054/fs/jfs/acl.c
--- linux-2.6.9-100.orig/fs/jfs/acl.c	2004-10-19 01:53:10.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/jfs/acl.c	2011-06-15 19:26:19.000000000 +0400
@@ -128,7 +128,7 @@ out:
  *
  * modified vfs_permission to check posix acl
  */
-int jfs_permission(struct inode * inode, int mask, struct nameidata *nd)
+int __jfs_permission(struct inode * inode, int mask)
 {
 	umode_t mode = inode->i_mode;
 	struct jfs_inode_info *ji = JFS_IP(inode);
@@ -207,6 +207,28 @@ check_capabilities:
 	return -EACCES;
 }
 
+int jfs_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *exec_perm)
+{
+	int ret;
+
+	if (exec_perm != NULL)
+		down(&inode->i_sem);
+
+	ret = __jfs_permission(inode, mask);
+
+	if (exec_perm != NULL) {
+		if (!ret) {
+			exec_perm->set = 1;
+			exec_perm->mode = inode->i_mode;
+			exec_perm->uid = inode->i_uid;
+			exec_perm->gid = inode->i_gid;
+		}
+		up(&inode->i_sem);
+	}
+	return ret;
+}
+
 int jfs_init_acl(struct inode *inode, struct inode *dir)
 {
 	struct posix_acl *acl = NULL;
diff -Nurap linux-2.6.9-100.orig/fs/jfs/jfs_acl.h linux-2.6.9-ve023stab054/fs/jfs/jfs_acl.h
--- linux-2.6.9-100.orig/fs/jfs/jfs_acl.h	2004-10-19 01:55:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/jfs/jfs_acl.h	2011-06-15 19:26:19.000000000 +0400
@@ -22,7 +22,7 @@
 
 #include <linux/xattr_acl.h>
 
-int jfs_permission(struct inode *, int, struct nameidata *);
+int jfs_permission(struct inode *, int, struct nameidata *, struct exec_perm *);
 int jfs_init_acl(struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
 
diff -Nurap linux-2.6.9-100.orig/fs/jfs/jfs_logmgr.c linux-2.6.9-ve023stab054/fs/jfs/jfs_logmgr.c
--- linux-2.6.9-100.orig/fs/jfs/jfs_logmgr.c	2004-10-19 01:54:20.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/jfs/jfs_logmgr.c	2011-06-15 19:26:18.000000000 +0400
@@ -2328,9 +2328,9 @@ int jfsIOWait(void *arg)
 			lbmStartIO(bp);
 			spin_lock_irq(&log_redrive_lock);
 		}
-		if (current->flags & PF_FREEZE) {
+		if (test_thread_flag(TIF_FREEZE)) {
 			spin_unlock_irq(&log_redrive_lock);
-			refrigerator(PF_FREEZE);
+			refrigerator();
 		} else {
 			add_wait_queue(&jfs_IO_thread_wait, &wq);
 			set_current_state(TASK_INTERRUPTIBLE);
diff -Nurap linux-2.6.9-100.orig/fs/jfs/jfs_txnmgr.c linux-2.6.9-ve023stab054/fs/jfs/jfs_txnmgr.c
--- linux-2.6.9-100.orig/fs/jfs/jfs_txnmgr.c	2004-10-19 01:54:32.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/jfs/jfs_txnmgr.c	2011-06-15 19:26:18.000000000 +0400
@@ -2773,9 +2773,9 @@ int jfs_lazycommit(void *arg)
 				break;
 		}
 
-		if (current->flags & PF_FREEZE) {
+		if (test_thread_flag(TIF_FREEZE)) {
 			LAZY_UNLOCK(flags);
-			refrigerator(PF_FREEZE);
+			refrigerator();
 		} else {
 			DECLARE_WAITQUEUE(wq, current);
 
@@ -2984,9 +2984,9 @@ int jfs_sync(void *arg)
 		/* Add anon_list2 back to anon_list */
 		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
 
-		if (current->flags & PF_FREEZE) {
+		if (test_thread_flag(TIF_FREEZE)) {
 			TXN_UNLOCK();
-			refrigerator(PF_FREEZE);
+			refrigerator();
 		} else {
 			DECLARE_WAITQUEUE(wq, current);
 
diff -Nurap linux-2.6.9-100.orig/fs/jfs/xattr.c linux-2.6.9-ve023stab054/fs/jfs/xattr.c
--- linux-2.6.9-100.orig/fs/jfs/xattr.c	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/jfs/xattr.c	2011-06-15 19:26:19.000000000 +0400
@@ -778,7 +778,7 @@ static int can_set_xattr(struct inode *i
 	    (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX))
 		return -EPERM;
 
-	return permission(inode, MAY_WRITE, NULL);
+	return permission(inode, MAY_WRITE, NULL, NULL);
 }
 
 int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
@@ -939,7 +939,7 @@ static int can_get_xattr(struct inode *i
 {
 	if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)
 		return 0;
-	return permission(inode, MAY_READ, NULL);
+	return permission(inode, MAY_READ, NULL, NULL);
 }
 
 ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
diff -Nurap linux-2.6.9-100.orig/fs/libfs.c linux-2.6.9-ve023stab054/fs/libfs.c
--- linux-2.6.9-100.orig/fs/libfs.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/libfs.c	2011-06-15 19:26:19.000000000 +0400
@@ -426,10 +426,13 @@ static spinlock_t pin_fs_lock = SPIN_LOC
 int simple_pin_fs(char *name, struct vfsmount **mount, int *count)
 {
 	struct vfsmount *mnt = NULL;
+	struct file_system_type *fstype;
 	spin_lock(&pin_fs_lock);
 	if (unlikely(!*mount)) {
 		spin_unlock(&pin_fs_lock);
-		mnt = do_kern_mount(name, 0, name, NULL);
+		fstype = get_fs_type(name);
+		mnt = do_kern_mount(fstype, 0, name, NULL);
+		put_filesystem(fstype);
 		if (IS_ERR(mnt))
 			return PTR_ERR(mnt);
 		spin_lock(&pin_fs_lock);
diff -Nurap linux-2.6.9-100.orig/fs/lockd/clntproc.c linux-2.6.9-ve023stab054/fs/lockd/clntproc.c
--- linux-2.6.9-100.orig/fs/lockd/clntproc.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/lockd/clntproc.c	2011-06-15 19:26:19.000000000 +0400
@@ -127,10 +127,10 @@ static void nlmclnt_setlockargs(struct n
 	nlmclnt_next_cookie(&argp->cookie);
 	argp->state   = nsm_local_state;
 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
-	lock->caller  = system_utsname.nodename;
+	lock->caller  = ve_utsname.nodename;
 	lock->oh.data = req->a_owner;
 	lock->oh.len  = sprintf(req->a_owner, "%d@%s",
-				current->tgid, system_utsname.nodename);
+				current->tgid, ve_utsname.nodename);
 	locks_copy_lock(&lock->fl, fl);
 }
 
@@ -151,7 +151,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca
 {
 	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
 	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-	call->a_args.lock.caller = system_utsname.nodename;
+	call->a_args.lock.caller = ve_utsname.nodename;
 	call->a_args.lock.oh.len = lock->oh.len;
 
 	/* set default data area */
diff -Nurap linux-2.6.9-100.orig/fs/lockd/mon.c linux-2.6.9-ve023stab054/fs/lockd/mon.c
--- linux-2.6.9-100.orig/fs/lockd/mon.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/lockd/mon.c	2011-06-15 19:26:19.000000000 +0400
@@ -149,7 +149,7 @@ xdr_encode_common(struct rpc_rqst *rqstp
 	 */
 	sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
 	if (!(p = xdr_encode_string(p, buffer))
-	 || !(p = xdr_encode_string(p, system_utsname.nodename)))
+	 || !(p = xdr_encode_string(p, ve_utsname.nodename)))
 		return ERR_PTR(-EIO);
 	*p++ = htonl(argp->prog);
 	*p++ = htonl(argp->vers);
diff -Nurap linux-2.6.9-100.orig/fs/locks.c linux-2.6.9-ve023stab054/fs/locks.c
--- linux-2.6.9-100.orig/fs/locks.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/locks.c	2011-06-15 19:26:21.000000000 +0400
@@ -127,6 +127,8 @@
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 
+#include <ub/ub_misc.h>
+
 #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
 #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
 #define IS_LEASE(fl)	(fl->fl_flags & FL_LEASE)
@@ -146,9 +148,23 @@ static LIST_HEAD(blocked_list);
 static kmem_cache_t *filelock_cache;
 
 /* Allocate an empty lock structure. */
-static struct file_lock *locks_alloc_lock(void)
+static struct file_lock *locks_alloc_lock(int charge)
 {
-	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+	struct file_lock *flock;
+
+	flock = kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+	if (flock == NULL)
+		goto out;
+	flock->fl_charged = 0;
+	if (!charge)
+		goto out;
+	if (!ub_flock_charge(flock, 1))
+		goto out;
+
+	kmem_cache_free(filelock_cache, flock);
+	flock = NULL;
+out:
+	return flock;
 }
 
 /* Free a lock which is not in use. */
@@ -174,6 +190,7 @@ static inline void locks_free_lock(struc
 	}
 	fl->fl_lmops = NULL;
 
+	ub_flock_uncharge(fl);
 	kmem_cache_free(filelock_cache, fl);
 }
 
@@ -253,8 +270,8 @@ static int flock_make_lock(struct file *
 	int type = flock_translate_cmd(cmd);
 	if (type < 0)
 		return type;
-	
-	fl = locks_alloc_lock();
+
+	fl = locks_alloc_lock(type != F_UNLCK);
 	if (fl == NULL)
 		return -ENOMEM;
 
@@ -386,7 +403,7 @@ static int flock64_to_posix_lock(struct 
 /* Allocate a file_lock initialised to this type of lease */
 static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
 {
-	struct file_lock *fl = locks_alloc_lock();
+	struct file_lock *fl = locks_alloc_lock(1);
 	if (fl == NULL)
 		return -ENOMEM;
 
@@ -705,7 +722,7 @@ static int flock_lock_file(struct file *
 		goto out;
 
 	error = -ENOMEM;
-	new_fl = locks_alloc_lock();
+	new_fl = locks_alloc_lock(0);
 	if (new_fl == NULL)
 		goto out;
 	/*
@@ -730,6 +747,10 @@ static int flock_lock_file(struct file *
 	}
 	locks_copy_lock(new_fl, request);
 	locks_insert_lock(&inode->i_flock, new_fl);
+
+	WARN_ON(!request->fl_charged);
+	new_fl->fl_charged = 1;
+	request->fl_charged = 0;
 	new_fl = NULL;
 	error = 0;
 
@@ -755,8 +776,11 @@ static int __posix_lock_file(struct inod
 	 * We may need two file_lock structures for this operation,
 	 * so we get them in advance to avoid races.
 	 */
-	new_fl = locks_alloc_lock();
-	new_fl2 = locks_alloc_lock();
+	if (request->fl_type != F_UNLCK)
+		new_fl = locks_alloc_lock(1);
+	else
+		new_fl = NULL;
+	new_fl2 = locks_alloc_lock(0);
 
 	lock_kernel();
 	if (request->fl_type != F_UNLCK) {
@@ -784,7 +808,7 @@ static int __posix_lock_file(struct inod
 		goto out;
 
 	error = -ENOLCK; /* "no luck" */
-	if (!(new_fl && new_fl2))
+	if (!((request->fl_type == F_UNLCK || new_fl) && new_fl2))
 		goto out;
 
 	/*
@@ -886,19 +910,29 @@ static int __posix_lock_file(struct inod
 	if (!added) {
 		if (request->fl_type == F_UNLCK)
 			goto out;
+		error = -ENOLCK;
+		if (right && (left == right) && ub_flock_charge(new_fl, 1))
+				goto out;
 		locks_copy_lock(new_fl, request);
 		locks_insert_lock(before, new_fl);
 		new_fl = NULL;
+		error = 0;
 	}
 	if (right) {
 		if (left == right) {
 			/* The new lock breaks the old one in two pieces,
 			 * so we have to use the second new lock.
 			 */
+			error = -ENOLCK;
+			if (added && ub_flock_charge(new_fl2,
+						request->fl_type != F_UNLCK))
+				goto out;
+			new_fl2->fl_charged = 1;
 			left = new_fl2;
 			new_fl2 = NULL;
 			locks_copy_lock(left, right);
 			locks_insert_lock(before, left);
+			error = 0;
 		}
 		right->fl_start = request->fl_end + 1;
 		locks_wake_up_blocks(right);
@@ -1435,6 +1469,7 @@ asmlinkage long sys_flock(unsigned int f
  out:
 	return error;
 }
+EXPORT_SYMBOL(sys_flock);
 
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
@@ -1471,7 +1506,7 @@ int fcntl_getlk(struct file *filp, struc
  
 	flock.l_type = F_UNLCK;
 	if (fl != NULL) {
-		flock.l_pid = fl->fl_pid;
+		flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
 #if BITS_PER_LONG == 32
 		/*
 		 * Make sure we can represent the posix lock via
@@ -1503,7 +1538,7 @@ out:
 int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		struct flock __user *l)
 {
-	struct file_lock *file_lock = locks_alloc_lock();
+	struct file_lock *file_lock = locks_alloc_lock(0);
 	struct flock flock;
 	struct inode *inode;
 	struct file *f;
@@ -1636,7 +1671,7 @@ int fcntl_getlk64(struct file *filp, str
  
 	flock.l_type = F_UNLCK;
 	if (fl != NULL) {
-		flock.l_pid = fl->fl_pid;
+		flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
 		flock.l_start = fl->fl_start;
 		flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
 			fl->fl_end - fl->fl_start + 1;
@@ -1657,7 +1692,7 @@ out:
 int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		struct flock64 __user *l)
 {
-	struct file_lock *file_lock = locks_alloc_lock();
+	struct file_lock *file_lock = locks_alloc_lock(1);
 	struct flock64 flock;
 	struct inode *inode;
 	struct file *f;
@@ -1892,7 +1927,9 @@ EXPORT_SYMBOL(posix_unblock_lock);
 static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
 {
 	struct inode *inode = NULL;
+	unsigned int fl_pid;
 
+	fl_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
 	if (fl->fl_file != NULL)
 		inode = fl->fl_file->f_dentry->d_inode;
 
@@ -1934,16 +1971,16 @@ static void lock_get_status(char* out, s
 	}
 	if (inode) {
 #ifdef WE_CAN_BREAK_LSLK_NOW
-		out += sprintf(out, "%d %s:%ld ", fl->fl_pid,
+		out += sprintf(out, "%d %s:%ld ", fl_pid,
 				inode->i_sb->s_id, inode->i_ino);
 #else
 		/* userspace relies on this representation of dev_t ;-( */
-		out += sprintf(out, "%d %02x:%02x:%ld ", fl->fl_pid,
+		out += sprintf(out, "%d %02x:%02x:%ld ", fl_pid,
 				MAJOR(inode->i_sb->s_dev),
 				MINOR(inode->i_sb->s_dev), inode->i_ino);
 #endif
 	} else {
-		out += sprintf(out, "%d <none>:0 ", fl->fl_pid);
+		out += sprintf(out, "%d <none>:0 ", fl_pid);
 	}
 	if (IS_POSIX(fl)) {
 		if (fl->fl_end == OFFSET_MAX)
@@ -1992,11 +2029,17 @@ int get_locks_status(char *buffer, char 
 	char *q = buffer;
 	off_t pos = 0;
 	int i = 0;
+	struct ve_struct *env;
 
 	lock_kernel();
+	env = get_exec_env();
 	list_for_each(tmp, &file_lock_list) {
 		struct list_head *btmp;
 		struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
+
+		if (!ve_accessible(VE_OWNER_FILP(fl->fl_file), env))
+			continue;
+
 		lock_get_status(q, fl, ++i, "");
 		move_lock_status(&q, &pos, offset);
 
@@ -2150,7 +2193,7 @@ EXPORT_SYMBOL(steal_locks);
 static int __init filelock_init(void)
 {
 	filelock_cache = kmem_cache_create("file_lock_cache",
-			sizeof(struct file_lock), 0, SLAB_PANIC,
+			sizeof(struct file_lock), 0, SLAB_PANIC | SLAB_UBC,
 			init_once, NULL);
 	return 0;
 }
diff -Nurap linux-2.6.9-100.orig/fs/mpage.c linux-2.6.9-ve023stab054/fs/mpage.c
--- linux-2.6.9-100.orig/fs/mpage.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/mpage.c	2011-06-15 19:26:18.000000000 +0400
@@ -717,6 +717,8 @@ retry:
 				bio = mpage_writepage(bio, page, get_block,
 						&last_block_in_bio, &ret, wbc);
 			}
+			if (unlikely(ret == WRITEPAGE_ACTIVATE))
+				unlock_page(page);
 			if (ret || (--(wbc->nr_to_write) <= 0))
 				done = 1;
 			if (wbc->nonblocking && bdi_write_congested(bdi)) {
diff -Nurap linux-2.6.9-100.orig/fs/namei.c linux-2.6.9-ve023stab054/fs/namei.c
--- linux-2.6.9-100.orig/fs/namei.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/namei.c	2011-06-15 19:26:21.000000000 +0400
@@ -115,11 +115,12 @@ static inline int do_getname(const char 
 	int retval;
 	unsigned long len = PATH_MAX;
 
-	if ((unsigned long) filename >= TASK_SIZE) {
-		if (!segment_eq(get_fs(), KERNEL_DS))
+	if (!segment_eq(get_fs(), KERNEL_DS)) {
+		if ((unsigned long) filename >= TASK_SIZE)
 			return -EFAULT;
-	} else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
-		len = TASK_SIZE - (unsigned long) filename;
+		if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
+			len = TASK_SIZE - (unsigned long) filename;
+	}
 
 	retval = strncpy_from_user((char *)page, filename, len);
 	if (retval > 0) {
@@ -135,6 +136,7 @@ char * getname(const char __user * filen
 {
 	char *tmp, *result;
 
+	ub_dentry_checkup();
 	result = ERR_PTR(-ENOMEM);
 	tmp = __getname();
 	if (tmp)  {
@@ -159,7 +161,7 @@ char * getname(const char __user * filen
  * for filesystem access without changing the "normal" uids which
  * are used for other things..
  */
-int vfs_permission(struct inode * inode, int mask)
+int __vfs_permission(struct inode * inode, int mask)
 {
 	umode_t			mode = inode->i_mode;
 
@@ -208,7 +210,29 @@ int vfs_permission(struct inode * inode,
 	return -EACCES;
 }
 
-int permission(struct inode * inode,int mask, struct nameidata *nd)
+int vfs_permission(struct inode * inode, int mask, struct exec_perm * exec_perm)
+{
+	int ret;
+
+	if (exec_perm != NULL)
+		down(&inode->i_sem);
+
+	ret = __vfs_permission(inode, mask);
+
+	if (exec_perm != NULL) {
+		if (!ret) {
+			exec_perm->set = 1;
+			exec_perm->mode = inode->i_mode;
+			exec_perm->uid = inode->i_uid;
+			exec_perm->gid = inode->i_gid;
+		}
+		up(&inode->i_sem);
+	}
+	return ret;
+}
+
+int permission(struct inode * inode, int mask, struct nameidata *nd,
+		struct exec_perm *exec_perm)
 {
 	int retval;
 	int submask;
@@ -227,9 +251,9 @@ int permission(struct inode * inode,int 
 	submask = mask & ~MAY_APPEND;
 
 	if (inode->i_op && inode->i_op->permission)
-		retval = inode->i_op->permission(inode, submask, nd);
+		retval = inode->i_op->permission(inode, submask, nd, exec_perm);
 	else
-		retval = vfs_permission(inode, submask);
+		retval = vfs_permission(inode, submask, exec_perm);
 	if (retval)
 		return retval;
 
@@ -335,6 +359,21 @@ static struct dentry * cached_lookup(str
 	if (!dentry)
 		dentry = d_lookup(parent, name);
 
+	/*
+	 * The revalidation rules are simple:
+	 * d_revalidate operation is called when we're about to use a cached
+	 * dentry rather than call d_lookup.
+	 * d_revalidate method may unhash the dentry itself or return FALSE, in
+	 * which case if the dentry can be released d_lookup will be called.
+	 *
+	 * Additionally, by request of NFS people
+	 * (http://linux.bkbits.net:8080/linux-2.4/cset@1.181?nav=index.html|src/|src/fs|related/fs/namei.c)
+	 * d_revalidate is called when `/', `.' or `..' are looked up.
+	 * Since re-lookup is impossible on them, we introduce a hack and
+	 * return an error in this case.
+	 *
+	 *     2003/02/19  SAW
+	 */
 	if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
 		dentry = do_revalidate(dentry, nd);
 
@@ -436,6 +475,7 @@ static struct dentry * real_lookup(struc
 	struct dentry * result;
 	struct inode *dir = parent->d_inode;
 
+repeat:
 	down(&dir->i_sem);
 	/*
 	 * First re-do the cached lookup just in case it was created
@@ -482,7 +522,7 @@ out_unlock:
 	if (result->d_op && result->d_op->d_revalidate) {
 		result = do_revalidate(result, nd);
 		if (!result)
-			result = ERR_PTR(-ENOENT);
+			goto repeat;
 	}
 	return result;
 }
@@ -664,7 +704,14 @@ static inline void follow_dotdot(struct 
                         read_unlock(&current->fs->lock);
 			break;
 		}
-                read_unlock(&current->fs->lock);
+#ifdef CONFIG_VE
+		if (*dentry == get_exec_env()->fs_root &&
+		    *mnt == get_exec_env()->fs_rootmnt)  {
+			read_unlock(&current->fs->lock);
+			break;
+		}
+#endif
+		read_unlock(&current->fs->lock);
 		spin_lock(&dcache_lock);
 		if (*dentry != (*mnt)->mnt_root) {
 			*dentry = dget((*dentry)->d_parent);
@@ -750,6 +797,7 @@ static fastcall int __link_path_walk(con
 {
 	struct path next;
 	struct inode *inode;
+	int real_components = 0;
 	int err, atomic;
 	unsigned int lookup_flags = nd->flags;
 
@@ -772,7 +820,7 @@ static fastcall int __link_path_walk(con
 
 		err = exec_permission_lite(inode, nd);
 		if (err == -EAGAIN) { 
-			err = permission(inode, MAY_EXEC, nd);
+			err = permission(inode, MAY_EXEC, nd, NULL);
 		}
  		if (err)
 			break;
@@ -824,10 +872,14 @@ static fastcall int __link_path_walk(con
 		}
 		nd->flags |= LOOKUP_CONTINUE;
 		/* This does the actual lookups.. */
+		real_components++;
 		err = do_lookup(nd, &this, &next, atomic);
 		if (err)
 			break;
 		/* Check mountpoints.. */
+		err = -ENOENT;
+		if ((lookup_flags & LOOKUP_STRICT) && d_mountpoint(nd->dentry))
+			goto out_dput;
 		follow_mount(&next.mnt, &next.dentry);
 
 		err = -ENOENT;
@@ -839,6 +891,10 @@ static fastcall int __link_path_walk(con
 			goto out_dput;
 
 		if (inode->i_op->follow_link) {
+			err = -ENOENT;
+			if (lookup_flags & LOOKUP_STRICT)
+				goto out_dput;
+
 			mntget(next.mnt);
 			if (next.mnt != nd->mnt) {
 				dput(nd->dentry);
@@ -895,9 +951,13 @@ last_component:
 		err = do_lookup(nd, &this, &next, atomic);
 		if (err)
 			break;
+		err = -ENOENT;
+		if ((lookup_flags & LOOKUP_STRICT) && d_mountpoint(nd->dentry))
+			goto out_dput;
 		follow_mount(&next.mnt, &next.dentry);
 		inode = next.dentry->d_inode;
 		if ((lookup_flags & LOOKUP_FOLLOW)
+		    && !(lookup_flags & LOOKUP_STRICT)
 		    && inode && inode->i_op && inode->i_op->follow_link) {
 			mntget(next.mnt);
 			if (next.mnt != nd->mnt) {
@@ -930,26 +990,40 @@ lookup_parent:
 		nd->last_type = LAST_NORM;
 		if (this.name[0] != '.')
 			goto return_base;
-		if (this.len == 1)
+		if (this.len == 1) {
 			nd->last_type = LAST_DOT;
-		else if (this.len == 2 && this.name[1] == '.')
+			goto return_reval;
+		} else if (this.len == 2 && this.name[1] == '.') {
 			nd->last_type = LAST_DOTDOT;
-		else
-			goto return_base;
+			goto return_reval;
+		}
+return_base:
+		if (!(nd->flags & LOOKUP_NOAREACHECK)) {
+			err = check_area_access_ve(nd->dentry, nd->mnt);
+			if (err)
+				break;
+		}
+		return 0;
 return_reval:
 		/*
 		 * We bypassed the ordinary revalidation routines.
 		 * We may need to check the cached dentry for staleness.
 		 */
-		if (nd->dentry && nd->dentry->d_sb &&
+		if (!real_components && nd->dentry && nd->dentry->d_sb &&
 		    (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
 			err = -ESTALE;
 			/* Note: we do not d_invalidate() */
 			if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
+				/*
+				 * This lookup is for `/' or `.' or `..'.
+				 * The filesystem unhashed the dentry itself
+				 * inside d_revalidate (otherwise, d_invalidate
+				 * wouldn't succeed).  As a special courtesy to
+				 * NFS we return an error.   2003/02/19  SAW
+				 */
 				break;
 		}
-return_base:
-		return 0;
+		goto return_base;
 out_dput:
 		dput(next.dentry);
 		break;
@@ -1112,7 +1186,7 @@ static struct dentry * __lookup_hash(str
 	int err;
 
 	inode = base->d_inode;
-	err = permission(inode, MAY_EXEC, nd);
+	err = permission(inode, MAY_EXEC, nd, NULL);
 	dentry = ERR_PTR(err);
 	if (err)
 		goto out;
@@ -1250,7 +1324,7 @@ static inline int may_delete(struct inod
 
 	audit_notify_watch(victim->d_inode, MAY_WRITE);
 
-	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
+	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL, NULL);
 	if (error)
 		return error;
 	if (IS_APPEND(dir))
@@ -1287,7 +1361,7 @@ static inline int may_create(struct inod
 		return -EEXIST;
 	if (IS_DEADDIR(dir))
 		return -ENOENT;
-	return permission(dir,MAY_WRITE | MAY_EXEC, nd);
+	return permission(dir, MAY_WRITE | MAY_EXEC, nd, NULL);
 }
 
 /* 
@@ -1396,7 +1470,7 @@ int may_open(struct nameidata *nd, int a
 	if (S_ISDIR(inode->i_mode) && (acc_mode & MAY_WRITE))
 		return -EISDIR;
 
-	error = permission(inode, acc_mode, nd);
+	error = permission(inode, acc_mode, nd, NULL);
 	if (error)
 		return error;
 
@@ -1768,6 +1842,7 @@ out:
 
 	return error;
 }
+EXPORT_SYMBOL(sys_mknod);
 
 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
@@ -1824,6 +1899,7 @@ out:
 
 	return error;
 }
+EXPORT_SYMBOL(sys_mkdir);
 
 /*
  * We try to drop the dentry early: we should have
@@ -1843,17 +1919,13 @@ out:
 void dentry_unhash(struct dentry *dentry)
 {
 	dget(dentry);
-	spin_lock(&dcache_lock);
-	switch (atomic_read(&dentry->d_count)) {
-	default:
-		spin_unlock(&dcache_lock);
+	if (atomic_read(&dentry->d_count))
 		shrink_dcache_parent(dentry);
-		spin_lock(&dcache_lock);
-		if (atomic_read(&dentry->d_count) != 2)
-			break;
-	case 2:
+	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
+	if (atomic_read(&dentry->d_count) == 2)
 		__d_drop(dentry);
-	}
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 }
 
@@ -1931,6 +2003,7 @@ exit:
 	putname(name);
 	return error;
 }
+EXPORT_SYMBOL(sys_rmdir);
 
 int vfs_unlink(struct inode *dir, struct dentry *dentry)
 {
@@ -2014,6 +2087,7 @@ slashes:
 		S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
 	goto exit2;
 }
+EXPORT_SYMBOL(sys_unlink);
 
 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
 {
@@ -2071,6 +2145,7 @@ out:
 	putname(from);
 	return error;
 }
+EXPORT_SYMBOL(sys_symlink);
 
 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
 {
@@ -2202,7 +2277,7 @@ int vfs_rename_dir(struct inode *old_dir
 	 * we'll need to flip '..'.
 	 */
 	if (new_dir != old_dir) {
-		error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
+		error = permission(old_dentry->d_inode, MAY_WRITE, NULL, NULL);
 		if (error)
 			return error;
 	}
@@ -2274,6 +2349,9 @@ int vfs_rename(struct inode *old_dir, st
 	int error;
 	int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
 
+	if (DQUOT_RENAME(old_dentry->d_inode, old_dir, new_dir))
+		return -EXDEV;
+
 	if (old_dentry->d_inode == new_dentry->d_inode)
  		return 0;
  
@@ -2403,6 +2481,7 @@ asmlinkage long sys_rename(const char __
 	putname(from);
 	return error;
 }
+EXPORT_SYMBOL(sys_rename);
 
 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
 {
@@ -2517,7 +2596,9 @@ int __page_symlink(struct inode *inode, 
 	kaddr = kmap_atomic(page, KM_USER0);
 	memcpy(kaddr, symname, len-1);
 	kunmap_atomic(kaddr, KM_USER0);
-	mapping->a_ops->commit_write(NULL, page, 0, len-1);
+	err = mapping->a_ops->commit_write(NULL, page, 0, len-1);
+	if (err)
+		goto fail_map;
 	/*
 	 * Notice that we are _not_ going to block here - end of page is
 	 * unmapped, so this will only try to map the rest of page, see
diff -Nurap linux-2.6.9-100.orig/fs/namespace.c linux-2.6.9-ve023stab054/fs/namespace.c
--- linux-2.6.9-100.orig/fs/namespace.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/namespace.c	2011-06-15 19:26:22.000000000 +0400
@@ -37,6 +37,7 @@ static inline int sysfs_init(void)
 
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
 spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+EXPORT_SYMBOL(vfsmount_lock);
 
 static struct list_head *mount_hashtable;
 static int hash_mask, hash_bits;
@@ -238,10 +239,32 @@ static int show_vfsmnt(struct seq_file *
 		{ 0, NULL }
 	};
 	struct proc_fs_info *fs_infop;
+	char *path_buf, *path;
 
-	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+	/* skip FS_NOMOUNT mounts (rootfs) */
+	if (mnt->mnt_sb->s_flags & MS_NOUSER)
+		return 0;
+
+	path_buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!path_buf)
+		return -ENOMEM;
+	path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+	if (IS_ERR(path)) {
+		free_page((unsigned long) path_buf);
+		/*
+		 * This means that the file position will be incremented, i.e.
+		 * the total number of "invisible" vfsmnt will leak.
+		 */
+		return 0;
+	}
+
+	if (ve_is_super(get_exec_env()))
+		mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+	else
+		mangle(m, mnt->mnt_sb->s_type->name);
 	seq_putc(m, ' ');
-	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	mangle(m, path);
+	free_page((unsigned long) path_buf);
 	seq_putc(m, ' ');
 	mangle(m, mnt->mnt_sb->s_type->name);
 	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
@@ -402,6 +425,7 @@ void umount_tree(struct vfsmount *mnt)
 		spin_lock(&vfsmount_lock);
 	}
 }
+EXPORT_SYMBOL(umount_tree);
 
 static int do_umount(struct vfsmount *mnt, int flags)
 {
@@ -518,7 +542,7 @@ asmlinkage long sys_umount(char __user *
 		goto dput_and_out;
 
 	retval = -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		goto dput_and_out;
 
 	retval = do_umount(nd.mnt, flags);
@@ -543,7 +567,7 @@ asmlinkage long sys_oldumount(char __use
 
 static int mount_is_safe(struct nameidata *nd)
 {
-	if (capable(CAP_SYS_ADMIN))
+	if (capable(CAP_VE_SYS_ADMIN))
 		return 0;
 	return -EPERM;
 #ifdef notyet
@@ -553,7 +577,7 @@ static int mount_is_safe(struct nameidat
 		if (current->uid != nd->dentry->d_inode->i_uid)
 			return -EPERM;
 	}
-	if (permission(nd->dentry->d_inode, MAY_WRITE, nd))
+	if (permission(nd->dentry->d_inode, MAY_WRITE, nd, NULL))
 		return -EPERM;
 	return 0;
 #endif
@@ -656,7 +680,8 @@ out_unlock:
 /*
  * do loopback mount.
  */
-static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
+static int do_loopback(struct nameidata *nd, char *old_name, int recurse,
+		int mnt_flags)
 {
 	struct nameidata old_nd;
 	struct vfsmount *mnt = NULL;
@@ -680,6 +705,7 @@ static int do_loopback(struct nameidata 
 	}
 
 	if (mnt) {
+		mnt->mnt_flags |= mnt_flags;
 		/* stop bind mounts from expiring */
 		spin_lock(&vfsmount_lock);
 		list_del_init(&mnt->mnt_fslink);
@@ -710,8 +736,9 @@ static int do_remount(struct nameidata *
 {
 	int err;
 	struct super_block * sb = nd->mnt->mnt_sb;
+	int bind = 0;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 
 	if (!check_mnt(nd->mnt))
@@ -720,12 +747,19 @@ static int do_remount(struct nameidata *
 	if (nd->dentry != nd->mnt->mnt_root)
 		return -EINVAL;
 
+	/* do not allow to remount bind-mounts with another mountpoint flags */
+	if (nd->dentry != sb->s_root) {
+		if ((flags & ~(MS_BIND|MS_POSIXACL|MS_NOUSER)) != 0)
+			return -EINVAL;
+		bind = 1;
+	}
+
 	down_write(&sb->s_umount);
-	err = do_remount_sb(sb, flags, data, 0);
+	err = bind ? 0 : do_remount_sb(sb, flags, data, 0);
 	if (!err)
 		nd->mnt->mnt_flags=mnt_flags;
 	up_write(&sb->s_umount);
-	if (!err)
+	if (!err && !bind)
 		security_sb_post_remount(nd->mnt, flags, data);
 	return err;
 }
@@ -735,7 +769,7 @@ static int do_move_mount(struct nameidat
 	struct nameidata old_nd, parent_nd;
 	struct vfsmount *p;
 	int err = 0;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	if (!old_name || !*old_name)
 		return -EINVAL;
@@ -802,15 +836,20 @@ static int do_new_mount(struct nameidata
 			int mnt_flags, char *name, void *data)
 {
 	struct vfsmount *mnt;
+	struct file_system_type *fstype;
 
 	if (!type || !memchr(type, 0, PAGE_SIZE))
 		return -EINVAL;
 
 	/* we need capabilities... */
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 
-	mnt = do_kern_mount(type, flags, name, data);
+	fstype = get_fs_type(type);
+	if (fstype == NULL)
+		return -ENODEV;
+	mnt = do_kern_mount(fstype, flags, name, data);
+	put_filesystem(fstype);
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
@@ -845,6 +884,11 @@ int do_add_mount(struct vfsmount *newmnt
 		goto unlock;
 
 	newmnt->mnt_flags = mnt_flags;
+
+	/* make this before graft_tree reveals mnt_root to the world... */
+	if (nd->dentry->d_flags & DCACHE_VIRTUAL)
+		newmnt->mnt_root->d_flags |= DCACHE_VIRTUAL;
+
 	err = graft_tree(newmnt, nd);
 
 	if (err == 0 && fslist) {
@@ -1013,8 +1057,11 @@ int copy_mount_options(const void __user
 	 * gave us is valid.  Just in case, we'll zero
 	 * the remainder of the page.
 	 */
-	/* copy_from_user cannot cross TASK_SIZE ! */
-	size = TASK_SIZE - (unsigned long)data;
+	if (segment_eq(get_fs(), KERNEL_DS))
+		size = PAGE_SIZE;
+	else 
+		/* copy_from_user cannot cross TASK_SIZE ! */
+		size = TASK_SIZE - (unsigned long)data;
 	if (size > PAGE_SIZE)
 		size = PAGE_SIZE;
 
@@ -1086,7 +1133,7 @@ long do_mount(char * dev_name, char * di
 		retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
 				    data_page);
 	else if (flags & MS_BIND)
-		retval = do_loopback(&nd, dev_name, flags & MS_REC);
+		retval = do_loopback(&nd, dev_name, flags & MS_REC, mnt_flags);
 	else if (flags & MS_MOVE)
 		retval = do_move_mount(&nd, dev_name);
 	else
@@ -1224,6 +1271,7 @@ out1:
 	free_page(type_page);
 	return retval;
 }
+EXPORT_SYMBOL(sys_mount);
 
 /*
  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
@@ -1276,7 +1324,7 @@ static void chroot_fs_refs(struct nameid
 	struct fs_struct *fs;
 
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_ve(g, p) {
 		task_lock(p);
 		fs = p->fs;
 		if (fs) {
@@ -1289,7 +1337,7 @@ static void chroot_fs_refs(struct nameid
 			put_fs_struct(fs);
 		} else
 			task_unlock(p);
-	} while_each_thread(g, p);
+	} while_each_thread_ve(g, p);
 	read_unlock(&tasklist_lock);
 }
 
@@ -1402,8 +1450,13 @@ static void __init init_mount_tree(void)
 	struct vfsmount *mnt;
 	struct namespace *namespace;
 	struct task_struct *g, *p;
+	struct file_system_type *fstype;
 
-	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
+	fstype = get_fs_type("rootfs");
+	if (fstype == NULL)
+		panic("Can't create rootfs");
+	mnt = do_kern_mount(fstype, 0, "rootfs", NULL);
+	put_filesystem(fstype);
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
 	namespace = kmalloc(sizeof(*namespace), GFP_KERNEL);
@@ -1418,10 +1471,10 @@ static void __init init_mount_tree(void)
 
 	init_task.namespace = namespace;
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		get_namespace(namespace);
 		p->namespace = namespace;
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	read_unlock(&tasklist_lock);
 
 	set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
@@ -1436,7 +1489,7 @@ void __init mnt_init(unsigned long mempa
 	int i;
 
 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 
 	order = 0; 
 	mount_hashtable = (struct list_head *)
@@ -1496,3 +1549,4 @@ void __put_namespace(struct namespace *n
 	up_write(&namespace->sem);
 	kfree(namespace);
 }
+EXPORT_SYMBOL(__put_namespace);
diff -Nurap linux-2.6.9-100.orig/fs/ncpfs/ioctl.c linux-2.6.9-ve023stab054/fs/ncpfs/ioctl.c
--- linux-2.6.9-100.orig/fs/ncpfs/ioctl.c	2004-10-19 01:54:31.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ncpfs/ioctl.c	2011-06-15 19:26:19.000000000 +0400
@@ -34,7 +34,7 @@ ncp_get_fs_info(struct ncp_server* serve
 {
 	struct ncp_fs_info info;
 
-	if ((permission(inode, MAY_WRITE, NULL) != 0)
+	if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
 	    && (current->uid != server->m.mounted_uid)) {
 		return -EACCES;
 	}
@@ -62,7 +62,7 @@ ncp_get_fs_info_v2(struct ncp_server* se
 {
 	struct ncp_fs_info_v2 info2;
 
-	if ((permission(inode, MAY_WRITE, NULL) != 0)
+	if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
 	    && (current->uid != server->m.mounted_uid)) {
 		return -EACCES;
 	}
@@ -190,7 +190,7 @@ int ncp_ioctl(struct inode *inode, struc
 	switch (cmd) {
 	case NCP_IOC_NCPREQUEST:
 
-		if ((permission(inode, MAY_WRITE, NULL) != 0)
+		if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
 		    && (current->uid != server->m.mounted_uid)) {
 			return -EACCES;
 		}
@@ -254,7 +254,7 @@ int ncp_ioctl(struct inode *inode, struc
 		{
 			unsigned long tmp = server->m.mounted_uid;
 
-			if (   (permission(inode, MAY_READ, NULL) != 0)
+			if (   (permission(inode, MAY_READ, NULL, NULL) != 0)
 			    && (current->uid != server->m.mounted_uid))
 			{
 				return -EACCES;
@@ -268,7 +268,7 @@ int ncp_ioctl(struct inode *inode, struc
 		{
 			struct ncp_setroot_ioctl sr;
 
-			if (   (permission(inode, MAY_READ, NULL) != 0)
+			if (   (permission(inode, MAY_READ, NULL, NULL) != 0)
 			    && (current->uid != server->m.mounted_uid))
 			{
 				return -EACCES;
@@ -343,7 +343,7 @@ int ncp_ioctl(struct inode *inode, struc
 
 #ifdef CONFIG_NCPFS_PACKET_SIGNING	
 	case NCP_IOC_SIGN_INIT:
-		if ((permission(inode, MAY_WRITE, NULL) != 0)
+		if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
 		    && (current->uid != server->m.mounted_uid))
 		{
 			return -EACCES;
@@ -366,7 +366,7 @@ int ncp_ioctl(struct inode *inode, struc
 		return 0;		
 		
         case NCP_IOC_SIGN_WANTED:
-		if (   (permission(inode, MAY_READ, NULL) != 0)
+		if (   (permission(inode, MAY_READ, NULL, NULL) != 0)
 		    && (current->uid != server->m.mounted_uid))
 		{
 			return -EACCES;
@@ -379,7 +379,7 @@ int ncp_ioctl(struct inode *inode, struc
 		{
 			int newstate;
 
-			if (   (permission(inode, MAY_WRITE, NULL) != 0)
+			if (   (permission(inode, MAY_WRITE, NULL, NULL) != 0)
 			    && (current->uid != server->m.mounted_uid))
 			{
 				return -EACCES;
@@ -400,7 +400,7 @@ int ncp_ioctl(struct inode *inode, struc
 
 #ifdef CONFIG_NCPFS_IOCTL_LOCKING
 	case NCP_IOC_LOCKUNLOCK:
-		if (   (permission(inode, MAY_WRITE, NULL) != 0)
+		if (   (permission(inode, MAY_WRITE, NULL, NULL) != 0)
 		    && (current->uid != server->m.mounted_uid))
 		{
 			return -EACCES;
@@ -605,7 +605,7 @@ outrel:			
 #endif /* CONFIG_NCPFS_NLS */
 
 	case NCP_IOC_SETDENTRYTTL:
-		if ((permission(inode, MAY_WRITE, NULL) != 0) &&
+		if ((permission(inode, MAY_WRITE, NULL, NULL) != 0) &&
 				 (current->uid != server->m.mounted_uid))
 			return -EACCES;
 		{
@@ -635,7 +635,7 @@ outrel:			
            so we have this out of switch */
 	if (cmd == NCP_IOC_GETMOUNTUID) {
 		__kernel_uid_t uid = 0;
-		if ((permission(inode, MAY_READ, NULL) != 0)
+		if ((permission(inode, MAY_READ, NULL, NULL) != 0)
 		    && (current->uid != server->m.mounted_uid)) {
 			return -EACCES;
 		}
diff -Nurap linux-2.6.9-100.orig/fs/nfs/dir.c linux-2.6.9-ve023stab054/fs/nfs/dir.c
--- linux-2.6.9-100.orig/fs/nfs/dir.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/nfs/dir.c	2011-06-15 19:26:19.000000000 +0400
@@ -2060,7 +2060,8 @@ out:
 	return -EACCES;
 }
 
-int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int nfs_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *exec_perm)
 {
 	struct rpc_cred *cred;
 	int mode = inode->i_mode;
@@ -2119,6 +2120,7 @@ force_lookup:
 	if (!NFS_PROTO(inode)->access)
 		goto out_notsup;
 
+	/* Can NFS fill exec_perm atomically?  Don't know...  --SAW */
 	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
 	res = nfs_do_access(inode, cred, mask);
 	put_rpccred(cred);
@@ -2130,7 +2132,7 @@ out:
 out_notsup:
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	if (res == 0)
-		res = vfs_permission(inode, mask);
+		res = vfs_permission(inode, mask, exec_perm);
 	unlock_kernel();
 	return res;
 }
diff -Nurap linux-2.6.9-100.orig/fs/nfs/direct.c linux-2.6.9-ve023stab054/fs/nfs/direct.c
--- linux-2.6.9-100.orig/fs/nfs/direct.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/nfs/direct.c	2011-06-15 19:26:19.000000000 +0400
@@ -58,6 +58,7 @@
 #include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
+#define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)
 
 static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
 static kmem_cache_t *nfs_direct_cachep;
@@ -113,6 +114,11 @@ static inline int nfs_get_user_pages(int
 	unsigned long page_count;
 	size_t array_size;
 
+	if (size > MAX_DIRECTIO_SIZE) {
+		*pages = NULL;
+  		return -EFBIG;
+	}
+
 	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	page_count -= user_addr >> PAGE_SHIFT;
 
diff -Nurap linux-2.6.9-100.orig/fs/nfs/inode.c linux-2.6.9-ve023stab054/fs/nfs/inode.c
--- linux-2.6.9-100.orig/fs/nfs/inode.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/nfs/inode.c	2011-06-15 19:26:22.000000000 +0400
@@ -1793,6 +1793,79 @@ static struct super_operations nfs4_sops
 	.show_options	= nfs_show_options,
 };
 
+#ifdef CONFIG_SYSCTL
+#define CTL_FS_NFS 1
+#define CTL_FS_NFS_VE_ALLOW_NFS 2
+
+static int ve_allow_nfs;
+static struct ctl_table_header *nfs_table_header;
+
+static void inline change_ve_allow_nfs(void)
+{
+	if (ve_allow_nfs) {
+		nfs_fs_type.fs_flags |= FS_VIRTUALIZED;
+		printk("NFS was allowed in VEs\n");
+	} else {
+		nfs_fs_type.fs_flags &= ~FS_VIRTUALIZED;
+		printk("NFS was prohibited in VEs\n");
+	}
+}
+
+static int sysctl_ve_allow_nfs(ctl_table *ctl, int write, struct file * filp,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int val;
+	int ret;
+
+	val = ve_allow_nfs;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (write && ve_allow_nfs != val && (!val || !ve_allow_nfs))
+		change_ve_allow_nfs();
+
+	return ret;
+}
+
+static ctl_table nfs_table[] = {
+	{
+		.ctl_name	= CTL_FS_NFS_VE_ALLOW_NFS,
+		.procname	= "ve_allow_nfs",
+		.data		= &ve_allow_nfs,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &sysctl_ve_allow_nfs,
+	},
+	{ 0 }
+};
+
+static ctl_table fs_table[] = {
+	{CTL_FS_NFS, "nfs", NULL, 0, 0555, nfs_table},
+	{ 0 }
+};
+
+static ctl_table sysctls_root[] = {
+	{ CTL_FS, "fs", NULL, 0, 0555, fs_table },
+	{ 0 }
+};
+
+static int nfs_sysctl_init(void)
+{
+	nfs_table_header = register_sysctl_table(sysctls_root, 0);
+	if (!nfs_table_header)
+		return -ENOMEM;
+	return 0;
+}
+
+static void nfs_sysctl_fini(void)
+{
+	unregister_sysctl_table(nfs_table_header);
+}
+#else
+static int nfs_sysctl_init(void) { return 0; }
+static void nfs_sysctl_fini(void) { ; }
+#endif
+
 /*
  * Clean out any remaining NFSv4 state that might be left over due
  * to open() calls that passed nfs_atomic_lookup, but failed to call
@@ -2254,9 +2327,18 @@ static int __init init_nfs_fs(void)
 	if (err)
 		goto out;
 	if ((err = register_nfs4fs()) != 0)
-		goto out;
+		goto out_regnfs4;
+
+	err = nfs_sysctl_init();
+	if (err)
+		goto out_sysctl;
 	acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
 	return 0;
+
+out_sysctl:
+	unregister_nfs4fs();
+out_regnfs4:
+	unregister_filesystem(&nfs_fs_type);
 out:
 #ifdef CONFIG_PROC_FS
 	rpc_proc_unregister("nfs");
@@ -2281,6 +2363,7 @@ static void __exit exit_nfs_fs(void)
 	if (acl_shrinker != NULL)
 		remove_shrinker(acl_shrinker);
 
+	nfs_sysctl_fini();
 #ifdef CONFIG_NFS_DIRECTIO
 	nfs_destroy_directcache();
 #endif
diff -Nurap linux-2.6.9-100.orig/fs/nfs/nfsroot.c linux-2.6.9-ve023stab054/fs/nfs/nfsroot.c
--- linux-2.6.9-100.orig/fs/nfs/nfsroot.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/nfs/nfsroot.c	2011-06-15 19:26:19.000000000 +0400
@@ -314,7 +314,7 @@ static int __init root_nfs_name(char *na
 	/* Override them by options set on kernel command-line */
 	root_nfs_parse(name, buf);
 
-	cp = system_utsname.nodename;
+	cp = ve_utsname.nodename;
 	if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
 		printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
 		return -1;
diff -Nurap linux-2.6.9-100.orig/fs/nfsctl.c linux-2.6.9-ve023stab054/fs/nfsctl.c
--- linux-2.6.9-100.orig/fs/nfsctl.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/nfsctl.c	2011-06-15 19:26:19.000000000 +0400
@@ -23,8 +23,14 @@ static struct file *do_open(char *name, 
 {
 	struct nameidata nd;
 	int error;
+	struct file_system_type *fstype;
 
-	nd.mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
+	fstype = get_fs_type("nfsd");
+	if (fstype == NULL)
+		return ERR_PTR(-ENODEV);
+
+	nd.mnt = do_kern_mount(fstype, 0, "nfsd", NULL);
+	put_filesystem(fstype);
 
 	if (IS_ERR(nd.mnt))
 		return (struct file *)nd.mnt;
diff -Nurap linux-2.6.9-100.orig/fs/nfsd/nfsfh.c linux-2.6.9-ve023stab054/fs/nfsd/nfsfh.c
--- linux-2.6.9-100.orig/fs/nfsd/nfsfh.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/nfsd/nfsfh.c	2011-06-15 19:26:19.000000000 +0400
@@ -56,7 +56,7 @@ int nfsd_acceptable(void *expv, struct d
 		/* make sure parents give x permission to user */
 		int err;
 		parent = dget_parent(tdentry);
-		err = permission(parent->d_inode, MAY_EXEC, NULL);
+		err = permission(parent->d_inode, MAY_EXEC, NULL, NULL);
 		if (err < 0) {
 			dput(parent);
 			break;
diff -Nurap linux-2.6.9-100.orig/fs/nfsd/vfs.c linux-2.6.9-ve023stab054/fs/nfsd/vfs.c
--- linux-2.6.9-100.orig/fs/nfsd/vfs.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/nfsd/vfs.c	2011-06-15 19:26:19.000000000 +0400
@@ -1794,12 +1794,13 @@ nfsd_permission(struct svc_export *exp, 
 	    inode->i_uid == current->fsuid)
 		return 0;
 
-	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
+	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC),
+			NULL, NULL);
 
 	/* Allow read access to binaries even when mode 111 */
 	if (err == -EACCES && S_ISREG(inode->i_mode) &&
 	    acc == (MAY_READ | MAY_OWNER_OVERRIDE))
-		err = permission(inode, MAY_EXEC, NULL);
+		err = permission(inode, MAY_EXEC, NULL, NULL);
 
 	return err? nfserrno(err) : 0;
 }
diff -Nurap linux-2.6.9-100.orig/fs/ntfs/super.c linux-2.6.9-ve023stab054/fs/ntfs/super.c
--- linux-2.6.9-100.orig/fs/ntfs/super.c	2004-10-19 01:54:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/ntfs/super.c	2011-06-15 19:26:19.000000000 +0400
@@ -2528,7 +2528,7 @@ iput_tmp_ino_err_out_now:
 	 * method again... FIXME: Do we need to do this twice now because of
 	 * attribute inodes? I think not, so leave as is for now... (AIA)
 	 */
-	if (invalidate_inodes(sb)) {
+	if (invalidate_inodes(sb, 0)) {
 		ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
 				"driver bug.");
 		/* Copied from fs/super.c. I just love this message. (-; */
diff -Nurap linux-2.6.9-100.orig/fs/open.c linux-2.6.9-ve023stab054/fs/open.c
--- linux-2.6.9-100.orig/fs/open.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/open.c	2011-06-15 19:26:21.000000000 +0400
@@ -23,6 +23,7 @@
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/audit.h>
+#include <linux/faudit.h>
 
 #include <asm/unistd.h>
 
@@ -47,7 +48,21 @@ int vfs_statfs(struct super_block *sb, s
 
 EXPORT_SYMBOL(vfs_statfs);
 
-static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
+int faudit_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	struct faudit_statfs_arg arg;
+
+	arg.sb = sb;
+	arg.stat = buf;
+
+	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS, &arg)
+			!= NOTIFY_DONE)
+		return arg.err;
+	return 0;
+}
+
+static int vfs_statfs_native(struct super_block *sb, struct vfsmount *mnt,
+		struct statfs *buf)
 {
 	struct kstatfs st;
 	int retval;
@@ -56,6 +71,10 @@ static int vfs_statfs_native(struct supe
 	if (retval)
 		return retval;
 
+	retval = faudit_statfs(mnt->mnt_sb, &st);
+	if (retval)
+		return retval;
+
 	if (sizeof(*buf) == sizeof(st))
 		memcpy(buf, &st, sizeof(st));
 	else {
@@ -90,7 +109,8 @@ static int vfs_statfs_native(struct supe
 	return 0;
 }
 
-static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
+static int vfs_statfs64(struct super_block *sb, struct vfsmount *mnt,
+		struct statfs64 *buf)
 {
 	struct kstatfs st;
 	int retval;
@@ -99,6 +119,10 @@ static int vfs_statfs64(struct super_blo
 	if (retval)
 		return retval;
 
+	retval = faudit_statfs(mnt->mnt_sb, &st);
+	if (retval)
+		return retval;
+
 	if (sizeof(*buf) == sizeof(st))
 		memcpy(buf, &st, sizeof(st));
 	else {
@@ -125,7 +149,8 @@ asmlinkage long sys_statfs(const char __
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs tmp;
-		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs_native(nd.dentry->d_inode->i_sb,
+				nd.mnt, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -144,7 +169,8 @@ asmlinkage long sys_statfs64(const char 
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs64 tmp;
-		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs64(nd.dentry->d_inode->i_sb,
+				nd.mnt, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -163,7 +189,8 @@ asmlinkage long sys_fstatfs(unsigned int
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb,
+			file->f_vfsmnt, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -184,7 +211,8 @@ asmlinkage long sys_fstatfs64(unsigned i
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs64(file->f_dentry->d_inode->i_sb,
+			file->f_vfsmnt, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -239,7 +267,7 @@ static inline long do_sys_truncate(const
 	if (!S_ISREG(inode->i_mode))
 		goto dput_and_out;
 
-	error = permission(inode,MAY_WRITE,&nd);
+	error = permission(inode,MAY_WRITE,&nd,NULL);
 	if (error)
 		goto dput_and_out;
 
@@ -393,7 +421,7 @@ asmlinkage long sys_utime(char __user * 
                         goto dput_and_out;
 
 		if (current->fsuid != inode->i_uid &&
-		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
+		    (error = permission(inode,MAY_WRITE,&nd,NULL)) != 0)
 			goto dput_and_out;
 	}
 	down(&inode->i_sem);
@@ -446,7 +474,7 @@ long do_utimes(char __user * filename, s
                         goto dput_and_out;
 
 		if (current->fsuid != inode->i_uid &&
-		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
+		    (error = permission(inode,MAY_WRITE,&nd,NULL)) != 0)
 			goto dput_and_out;
 	}
 	down(&inode->i_sem);
@@ -505,7 +533,7 @@ asmlinkage long sys_access(const char __
 
 	res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
 	if (!res) {
-		res = permission(nd.dentry->d_inode, mode, &nd);
+		res = permission(nd.dentry->d_inode, mode, &nd, NULL);
 		/* SuS v2 requires we report a read only fs too */
 		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
 		   && !special_file(nd.dentry->d_inode->i_mode))
@@ -529,7 +557,7 @@ asmlinkage long sys_chdir(const char __u
 	if (error)
 		goto out;
 
-	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
+	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd,NULL);
 	if (error)
 		goto dput_and_out;
 
@@ -564,7 +592,7 @@ asmlinkage long sys_fchdir(unsigned int 
 	if (!S_ISDIR(inode->i_mode))
 		goto out_putf;
 
-	error = permission(inode, MAY_EXEC, NULL);
+	error = permission(inode, MAY_EXEC, NULL, NULL);
 	if (!error)
 		set_fs_pwd(current->fs, mnt, dentry);
 out_putf:
@@ -582,7 +610,7 @@ asmlinkage long sys_chroot(const char __
 	if (error)
 		goto out;
 
-	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
+	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd,NULL);
 	if (error)
 		goto dput_and_out;
 
@@ -717,6 +745,7 @@ asmlinkage long sys_chown(const char __u
 	}
 	return error;
 }
+EXPORT_SYMBOL(sys_chown);
 
 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
 {
@@ -809,6 +838,9 @@ static struct file *__dentry_open(struct
 	struct inode *inode;
 	int error;
 
+	if (!capable(CAP_SYS_RAWIO))
+		flags &= ~O_DIRECT;
+
 	f->f_flags = flags;
 	f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
 	inode = dentry->d_inode;
@@ -1117,3 +1149,85 @@ int nonseekable_open(struct inode *inode
 }
 
 EXPORT_SYMBOL(nonseekable_open);
+
+asmlinkage long sys_lchmod(char __user * filename, mode_t mode)
+{
+	struct nameidata nd;
+	struct inode * inode;
+	int error;
+	struct iattr newattrs;
+
+	error = user_path_walk_link(filename, &nd);
+	if (error)
+		goto out;
+	inode = nd.dentry->d_inode;
+
+	error = -EROFS;
+	if (IS_RDONLY(inode))
+		goto dput_and_out;
+
+	/* NOTE: Don't forget that I_IMMUTABLE and I_APPEND
+	 * checks must be in notify change
+	 */
+
+	down(&inode->i_sem);
+	if (mode == (mode_t) -1)
+		mode = inode->i_mode;
+	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+	error = notify_change(nd.dentry, &newattrs);
+	up(&inode->i_sem);
+
+dput_and_out:
+	path_release(&nd);
+out:
+	return error;
+}
+
+asmlinkage long sys_lutime(char __user * filename,
+		struct utimbuf __user * times)
+{
+	int error;
+	struct nameidata nd;
+	struct inode * inode;
+	struct iattr newattrs;
+
+	error = user_path_walk_link(filename, &nd);
+	if (error)
+		goto out;
+	inode = nd.dentry->d_inode;
+
+	error = -EROFS;
+	if (IS_RDONLY(inode))
+		goto dput_and_out;
+
+	/* NOTE: Don't forget that I_IMMUTABLE and I_APPEND
+	 * checks must be in notify change
+	 */
+
+	/* Don't worry, the checks are done in inode_change_ok() */
+	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
+	if (times) {
+		error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
+		newattrs.ia_atime.tv_nsec = 0;
+		if (!error)
+			error = get_user(newattrs.ia_mtime.tv_sec,
+					&times->modtime);
+		newattrs.ia_mtime.tv_nsec = 0;
+		if (error)
+			goto dput_and_out;
+
+		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+	} else {
+		if (current->fsuid != inode->i_uid &&
+		    (error = permission(inode, MAY_WRITE, NULL, NULL)) != 0)
+			goto dput_and_out;
+	}
+	down(&inode->i_sem);
+	error = notify_change(nd.dentry, &newattrs);
+	up(&inode->i_sem);
+dput_and_out:
+	path_release(&nd);
+out:
+	return error;
+}
diff -Nurap linux-2.6.9-100.orig/fs/partitions/check.c linux-2.6.9-ve023stab054/fs/partitions/check.c
--- linux-2.6.9-100.orig/fs/partitions/check.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/partitions/check.c	2011-06-15 19:26:22.000000000 +0400
@@ -128,6 +128,7 @@ char *disk_name(struct gendisk *hd, int 
 
 	return buf;
 }
+EXPORT_SYMBOL(disk_name);
 
 const char *bdevname(struct block_device *bdev, char *buf)
 {
@@ -205,6 +206,7 @@ check_partition(struct gendisk *hd, stru
 struct part_attribute {
 	struct attribute attr;
 	ssize_t (*show)(struct hd_struct *,char *);
+	ssize_t (*store)(struct hd_struct *,const char *, size_t);
 };
 
 static ssize_t 
@@ -214,14 +216,33 @@ part_attr_show(struct kobject * kobj, st
 	struct part_attribute * part_attr = container_of(attr,struct part_attribute,attr);
 	ssize_t ret = 0;
 	if (part_attr->show)
-		ret = part_attr->show(p,page);
+		ret = part_attr->show(p, page);
+	return ret;
+}
+static ssize_t
+part_attr_store(struct kobject * kobj, struct attribute * attr,
+	const char *page, size_t count)
+{
+	struct hd_struct * p = container_of(kobj,struct hd_struct,kobj);
+	struct part_attribute * part_attr = container_of(attr,struct part_attribute,attr);
+	ssize_t ret = 0;
+
+	if (part_attr->store)
+		ret = part_attr->store(p, page, count);
 	return ret;
 }
 
 static struct sysfs_ops part_sysfs_ops = {
 	.show	=	part_attr_show,
+	.store	=	part_attr_store,
 };
 
+static ssize_t part_uevent_store(struct hd_struct * p,
+	const char *page, size_t count)
+{
+	kobject_hotplug(&p->kobj, KOBJ_ADD);
+	return count;
+}
 static ssize_t part_dev_read(struct hd_struct * p, char *page)
 {
 	struct gendisk *disk = container_of(p->kobj.parent,struct gendisk,kobj);
@@ -260,6 +281,10 @@ static ssize_t part_stats_read(struct hd
 	rcu_read_unlock();
 	return res;
 }
+static struct part_attribute part_attr_uevent = {
+	.attr = {.name = "uevent", .mode = S_IWUSR },
+	.store  = part_uevent_store
+};
 static struct part_attribute part_attr_dev = {
 	.attr = {.name = "dev", .mode = S_IRUGO },
 	.show	= part_dev_read
@@ -278,6 +303,7 @@ static struct part_attribute part_attr_s
 };
 
 static struct attribute * default_attrs[] = {
+	&part_attr_uevent.attr,
 	&part_attr_dev.attr,
 	&part_attr_start.attr,
 	&part_attr_size.attr,
diff -Nurap linux-2.6.9-100.orig/fs/pipe.c linux-2.6.9-ve023stab054/fs/pipe.c
--- linux-2.6.9-100.orig/fs/pipe.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/pipe.c	2011-06-15 19:26:19.000000000 +0400
@@ -548,7 +548,7 @@ struct inode* pipe_new(struct inode* ino
 {
 	unsigned long page;
 
-	page = __get_free_page(GFP_USER);
+	page = __get_free_page(GFP_USER_UBC);
 	if (!page)
 		return NULL;
 
diff -Nurap linux-2.6.9-100.orig/fs/proc/array.c linux-2.6.9-ve023stab054/fs/proc/array.c
--- linux-2.6.9-100.orig/fs/proc/array.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/proc/array.c	2011-06-15 19:26:20.000000000 +0400
@@ -62,6 +62,7 @@
 #include <linux/string.h>
 #include <linux/mman.h>
 #include <linux/proc_fs.h>
+#include <linux/fairsched.h>
 #include <linux/ioport.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
@@ -75,6 +76,7 @@
 #include <linux/times.h>
 #include <linux/resource.h>
 #include <linux/ptrace.h>
+#include <ub/beancounter.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -159,8 +161,13 @@ static inline char * task_state(struct t
 {
 	struct group_info *group_info;
 	int g;
+	pid_t pid, ppid, tgid;
+
+	pid = get_task_pid(p);
+	tgid = get_task_tgid(p);
 
 	read_lock(&tasklist_lock);
+	ppid = get_task_ppid(p);
 	buffer += sprintf(buffer,
 		"State:\t%s\n"
 		"SleepAVG:\t%lu%%\n"
@@ -168,13 +175,19 @@ static inline char * task_state(struct t
 		"Pid:\t%d\n"
 		"PPid:\t%d\n"
 		"TracerPid:\t%d\n"
+#ifdef CONFIG_FAIRSCHED
+		"FNid:\t%d\n"
+#endif
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p),
 		(p->sleep_avg/1024)*100/(1020000000/1024),
-	       	p->tgid,
-		p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
-		pid_alive(p) && p->ptrace ? p->parent->pid : 0,
+		tgid,
+		pid, ppid,
+		pid_alive(p) && p->ptrace ? get_task_pid(p->parent) : 0,
+#ifdef CONFIG_FAIRSCHED
+		task_fairsched_node_id(p),
+#endif
 		p->uid, p->euid, p->suid, p->fsuid,
 		p->gid, p->egid, p->sgid, p->fsgid);
 	read_unlock(&tasklist_lock);
@@ -193,6 +206,20 @@ static inline char * task_state(struct t
 	put_group_info(group_info);
 
 	buffer += sprintf(buffer, "\n");
+
+#ifdef CONFIG_VE
+	buffer += sprintf(buffer,
+			"envID:\t%d\n"
+			"VPid:\t%d\n"
+			"PNState:\t%u\n"
+			"StopState:\t%u\n"
+			"SigSuspState:\t%u\n",
+			VE_TASK_INFO(p)->owner_env->veid,
+			virt_pid(p),
+			p->pn_state,
+			p->stopped_state,
+			p->sigsuspend_state);
+#endif
 	return buffer;
 }
 
@@ -238,7 +265,7 @@ static void collect_sigign_sigcatch(stru
 
 static inline char * task_sig(struct task_struct *p, char *buffer)
 {
-	sigset_t pending, shpending, blocked, ignored, caught;
+	sigset_t pending, shpending, blocked, ignored, caught, saved;
 	int num_threads = 0;
 
 	sigemptyset(&pending);
@@ -246,6 +273,7 @@ static inline char * task_sig(struct tas
 	sigemptyset(&blocked);
 	sigemptyset(&ignored);
 	sigemptyset(&caught);
+	sigemptyset(&saved);
 
 	/* Gather all the data with the appropriate locks held */
 	read_lock(&tasklist_lock);
@@ -254,6 +282,7 @@ static inline char * task_sig(struct tas
 		pending = p->pending.signal;
 		shpending = p->signal->shared_pending.signal;
 		blocked = p->blocked;
+		saved = p->saved_sigset;
 		collect_sigign_sigcatch(p, &ignored, &caught);
 		num_threads = atomic_read(&p->signal->count);
 		spin_unlock_irq(&p->sighand->siglock);
@@ -268,6 +297,7 @@ static inline char * task_sig(struct tas
 	buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
 	buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
 	buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
+	buffer = render_sigset_t("SigSvd:\t", &saved, buffer);
 
 	return buffer;
 }
@@ -282,6 +312,24 @@ static inline char *task_cap(struct task
 			    cap_t(p->cap_effective));
 }
 
+#ifdef CONFIG_USER_RESOURCE
+static inline char *task_show_ub(struct task_struct *p, char *buffer)
+{
+	char ub_info[64];
+
+	print_ub_uid(get_task_ub(p), ub_info, sizeof(ub_info));
+	buffer += sprintf(buffer, "TaskUB:\t%s\n", ub_info);
+	task_lock(p);
+	if (p->mm != NULL)
+		print_ub_uid(mm_ub(p->mm), ub_info, sizeof(ub_info));
+	else
+		strcpy(ub_info, "N/A");
+	task_unlock(p);
+	buffer += sprintf(buffer, "MMUB:\t%s\n", ub_info);
+	return buffer;
+}
+#endif
+
 int proc_pid_status(struct task_struct *task, char * buffer)
 {
 	char * orig = buffer;
@@ -299,6 +347,9 @@ int proc_pid_status(struct task_struct *
 #if defined(CONFIG_ARCH_S390)
 	buffer = task_show_regs(task, buffer);
 #endif
+#ifdef CONFIG_USER_RESOURCE
+	buffer = task_show_ub(task, buffer);
+#endif
 	return buffer - orig;
 }
 
@@ -319,6 +370,8 @@ static int do_task_stat(struct task_stru
 	unsigned long  min_flt = 0,  maj_flt = 0,  utime = 0,  stime = 0;
 	struct task_struct *t;
 	char tcomm[sizeof(task->comm)];
+	char mm_ub_info[64];
+	char task_ub_info[64];
 
 	state = *get_task_state(task);
 	vsize = eip = esp = 0;
@@ -358,11 +411,11 @@ static int do_task_stat(struct task_stru
 	}
 	if (task->signal) {
 		if (task->signal->tty) {
-			tty_pgrp = task->signal->tty->pgrp;
+			tty_pgrp = pid_type_to_vpid(PIDTYPE_PGID, task->signal->tty->pgrp);
 			tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
 		}
-		pgid = process_group(task);
-		sid = task->signal->session;
+		pgid = get_task_pgid(task);
+		sid = get_task_sid(task);
 		cmin_flt = task->signal->cmin_flt;
 		cmaj_flt = task->signal->cmaj_flt;
 		cutime = task->signal->cutime;
@@ -374,7 +427,7 @@ static int do_task_stat(struct task_stru
 			stime += task->signal->stime;
 		}
 	}
-	ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
+	ppid = get_task_ppid(task);
 	read_unlock(&tasklist_lock);
 
 	if (permitted && (!whole || num_threads<2) &&
@@ -394,17 +447,34 @@ static int do_task_stat(struct task_stru
 	priority = task_prio(task);
 	nice = task_nice(task);
 
+#ifndef CONFIG_VE
 	/* Temporary variable needed for gcc-2.96 */
 	/* convert timespec -> nsec*/
 	start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
 				+ task->start_time.tv_nsec;
 	/* convert nsec -> ticks */
 	start_time = nsec_to_clock_t(start_time);
+#else
+	start_time = ve_relative_clock(&task->start_time);
+#endif
+
+#ifdef CONFIG_USER_RESOURCE
+	print_ub_uid(get_task_ub(task), task_ub_info, sizeof(task_ub_info));
+	if (mm != NULL)
+		print_ub_uid(mm_ub(mm), mm_ub_info, sizeof(mm_ub_info));
+	else
+		strcpy(mm_ub_info, "N/A");
+#else
+	strcpy(task_ub_info, "0");
+	strcpy(mm_ub_info, "0");
+#endif
 
 	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
 %lu %lu %lu %lu %lu %lu %lu %ld %ld %d %lu %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
-		task->pid,
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu \
+0 0 0 0 0 0 0 0 %d %u \
+%s %s\n",
+		get_task_pid(task),
 		tcomm,
 		state,
 		ppid,
@@ -448,7 +518,11 @@ static int do_task_stat(struct task_stru
 		task->exit_signal,
 		task_cpu(task),
 		task->rt_priority,
-		task->policy);
+		task->policy,
+		virt_pid(task),
+		VEID(VE_TASK_INFO(task)->owner_env),
+		task_ub_info,
+		mm_ub_info);
 	if(mm)
 		mmput(mm);
 	return res;
diff -Nurap linux-2.6.9-100.orig/fs/proc/base.c linux-2.6.9-ve023stab054/fs/proc/base.c
--- linux-2.6.9-100.orig/fs/proc/base.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/proc/base.c	2011-06-15 19:26:20.000000000 +0400
@@ -294,22 +294,25 @@ static int proc_fd_link(struct inode *in
 	struct files_struct *files;
 	struct file *file;
 	int fd = proc_type(inode) - PROC_TID_FD_DIR;
+	int err = -ENOENT;
 
 	files = get_files_struct(task);
 	if (files) {
 		spin_lock(&files->file_lock);
 		file = fcheck_files(files, fd);
 		if (file) {
-			*mnt = mntget(file->f_vfsmnt);
-			*dentry = dget(file->f_dentry);
-			spin_unlock(&files->file_lock);
-			put_files_struct(files);
-			return 0;
+			if (d_root_check(file->f_dentry, file->f_vfsmnt)) {
+				err = -EACCES;
+			} else {
+				*mnt = mntget(file->f_vfsmnt);
+				*dentry = dget(file->f_dentry);
+				err = 0;
+			}
 		}
 		spin_unlock(&files->file_lock);
 		put_files_struct(files);
 	}
-	return -ENOENT;
+	return err;
 }
 
 static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
@@ -326,13 +329,16 @@ static int proc_exe_link(struct inode *i
 	while (vma) {
 		if ((vma->vm_flags & VM_EXECUTABLE) && 
 		    vma->vm_file) {
-			*mnt = mntget(vma->vm_file->f_vfsmnt);
-			*dentry = dget(vma->vm_file->f_dentry);
-			result = 0;
+			result = d_root_check(vma->vm_file->f_dentry,
+					vma->vm_file->f_vfsmnt);
+			if (!result) {
+				*mnt = mntget(vma->vm_file->f_vfsmnt);
+				*dentry = dget(vma->vm_file->f_dentry);
+			}
 			break;
 		}
 		vma = vma->vm_next;
-	}
+ 	}
 	up_read(&mm->mmap_sem);
 	mmput(mm);
 out:
@@ -350,10 +356,12 @@ static int proc_cwd_link(struct inode *i
 	task_unlock(proc_task(inode));
 	if (fs) {
 		read_lock(&fs->lock);
-		*mnt = mntget(fs->pwdmnt);
-		*dentry = dget(fs->pwd);
+		result = d_root_check(fs->pwd, fs->pwdmnt);
+		if (!result) {
+			*mnt = mntget(fs->pwdmnt);
+			*dentry = dget(fs->pwd);
+		}
 		read_unlock(&fs->lock);
-		result = 0;
 		put_fs_struct(fs);
 	}
 	return result;
@@ -402,6 +410,11 @@ int __may_ptrace_attach(struct task_stru
 	rmb();
 	if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE))
 		goto out;
+	if (task->mm->vps_dumpable != 1 && !ve_is_super(get_exec_env()))
+		goto out;
+	/* optional: defensive measure */
+	if (!ve_accessible(VE_TASK_INFO(task)->owner_env, get_exec_env()))
+		goto out;
 	if (security_ptrace(current, task))
 		goto out;
 
@@ -572,9 +585,10 @@ out:
 	goto exit;
 }
 
-static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int proc_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *exec_perm)
 {
-	if (vfs_permission(inode, mask) != 0)
+	if (vfs_permission(inode, mask, exec_perm) != 0)
 		return -EACCES;
 	return proc_check_root(inode);
 }
@@ -1334,6 +1348,10 @@ static struct inode *proc_pid_make_inode
 	struct inode * inode;
 	struct proc_inode *ei;
 
+	if (!ve_accessible(VE_TASK_INFO(task)->owner_env,
+			   VE_OWNER_FSTYPE(sb->s_type)))
+		return NULL;
+
 	/* We need a new inode */
 	
 	inode = new_inode(sb);
@@ -1439,6 +1457,10 @@ static void pid_base_iput(struct dentry 
 	spin_lock(&task->proc_lock);
 	if (task->proc_dentry == dentry)
 		task->proc_dentry = NULL;
+#ifdef CONFIG_VE
+	if (VE_TASK_INFO(task)->glob_proc_dentry == dentry)
+		VE_TASK_INFO(task)->glob_proc_dentry = NULL;
+#endif
 	spin_unlock(&task->proc_lock);
 	iput(inode);
 }
@@ -1909,14 +1931,14 @@ static int proc_self_readlink(struct den
 			      int buflen)
 {
 	char tmp[30];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", get_task_tgid(current));
 	return vfs_readlink(dentry,buffer,buflen,tmp);
 }
 
 static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	char tmp[30];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", get_task_tgid(current));
 	return vfs_follow_link(nd,tmp);
 }	
 
@@ -1941,24 +1963,33 @@ static struct inode_operations proc_self
  *   of PIDTYPE_PID.
  */
 
-struct dentry *proc_pid_unhash(struct task_struct *p)
+struct dentry *__proc_pid_unhash(struct task_struct *p, struct dentry *proc_dentry)
 {
-	struct dentry *proc_dentry;
-
-	proc_dentry = p->proc_dentry;
 	if (proc_dentry != NULL) {
 
 		spin_lock(&dcache_lock);
+		spin_lock(&proc_dentry->d_lock);
 		if (!d_unhashed(proc_dentry)) {
 			dget_locked(proc_dentry);
 			__d_drop(proc_dentry);
-		} else
+			spin_unlock(&proc_dentry->d_lock);
+		} else {
+			spin_unlock(&proc_dentry->d_lock);
 			proc_dentry = NULL;
+		}
 		spin_unlock(&dcache_lock);
 	}
 	return proc_dentry;
 }
 
+void proc_pid_unhash(struct task_struct *p, struct dentry *pd[2])
+{
+	pd[0] = __proc_pid_unhash(p, p->proc_dentry);
+#ifdef CONFIG_VE
+	pd[1] = __proc_pid_unhash(p, VE_TASK_INFO(p)->glob_proc_dentry);
+#endif
+}
+
 /**
  * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries
  * @proc_entry: directoy to prune.
@@ -1966,7 +1997,7 @@ struct dentry *proc_pid_unhash(struct ta
  * Shrink the /proc directory that was used by the just killed thread.
  */
 	
-void proc_pid_flush(struct dentry *proc_dentry)
+void __proc_pid_flush(struct dentry *proc_dentry)
 {
 	might_sleep();
 	if(proc_dentry != NULL) {
@@ -1975,6 +2006,48 @@ void proc_pid_flush(struct dentry *proc_
 	}
 }
 
+void proc_pid_flush(struct dentry *proc_dentry[2])
+{
+	__proc_pid_flush(proc_dentry[0]);
+#ifdef CONFIG_VE
+	__proc_pid_flush(proc_dentry[1]);
+#endif
+}
+
+/**
+ * proc_dentry_set - fill task->proc_dentry entry
+ * @task: task_struct to fille the entry in.
+ * @dentry: proc_dentry to be set.
+ * @pd: proc entries to be flushed if task exited.
+ * Returns @dentry if the @task still alive, NULL otherwise.
+ *
+ * Should be called with task_struct reference held,
+ * with already associated inode to the @dentry.
+ */
+static struct dentry* proc_dentry_set(struct task_struct *task, struct dentry *dentry, struct dentry *pd[2])
+{
+	int died;
+
+	died = 0;
+	spin_lock(&task->proc_lock);
+#ifdef CONFIG_VE
+	if (ve_is_super(VE_OWNER_FSTYPE(dentry->d_inode->i_sb->s_type)))
+		VE_TASK_INFO(task)->glob_proc_dentry = dentry;
+	else
+		task->proc_dentry = dentry;
+#else
+	task->proc_dentry = dentry;
+#endif
+
+	if (!pid_alive(task)) {
+		proc_pid_unhash(task, pd);
+		died = 1;
+	}
+	spin_unlock(&task->proc_lock);
+
+	return died ? NULL : dentry;
+}
+
 /* SMP-safe */
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
@@ -1982,7 +2055,8 @@ struct dentry *proc_pid_lookup(struct in
 	struct inode *inode;
 	struct proc_inode *ei;
 	unsigned tgid;
-	int died;
+	struct dentry *pd[2];
+	struct dentry *alive;
 
 	if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
 		inode = new_inode(dir->i_sb);
@@ -2004,7 +2078,26 @@ struct dentry *proc_pid_lookup(struct in
 		goto out;
 
 	read_lock(&tasklist_lock);
-	task = find_task_by_pid(tgid);
+	task = find_task_by_pid_ve(tgid);
+	/* In theory we are allowed to lookup both /proc/VIRT_PID and
+	 * /proc/GLOBAL_PID inside VE. However, current /proc implementation
+	 * cannot maintain two references to one task, so that we have
+	 * to prohibit /proc/GLOBAL_PID.
+	 */
+	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tgid)) {
+		/* However, VE_ENTERed tasks are exception, they use global
+		 * pids.
+		 */
+		if (virt_pid(task) != tgid)
+			task = NULL;
+	}
+
+	/* Don't create /proc/<pid> entry for non-leader threads.
+	 * Their proc_entry should be /proc/<tgid>/task/<pid>
+	 */
+	if (task && !thread_group_leader(task))
+		task = NULL;
+
 	if (task)
 		get_task_struct(task);
 	read_unlock(&tasklist_lock);
@@ -2030,21 +2123,16 @@ struct dentry *proc_pid_lookup(struct in
 
 	dentry->d_op = &pid_base_dentry_operations;
 
-	died = 0;
 	d_add(dentry, inode);
-	spin_lock(&task->proc_lock);
-	task->proc_dentry = dentry;
-	if (!pid_alive(task)) {
-		dentry = proc_pid_unhash(task);
-		died = 1;
-	}
-	spin_unlock(&task->proc_lock);
+
+	alive = proc_dentry_set(task, dentry, pd);
 
 	put_task_struct(task);
-	if (died) {
-		proc_pid_flush(dentry);
+	if (!alive) {
+		proc_pid_flush(pd);
 		goto out;
 	}
+	
 	return NULL;
 out:
 	return ERR_PTR(-ENOENT);
@@ -2057,13 +2145,20 @@ static struct dentry *proc_task_lookup(s
 	struct task_struct *leader = proc_task(dir);
 	struct inode *inode;
 	unsigned tid;
+	struct dentry *pd[2];
+	struct dentry *alive;
 
 	tid = name_to_int(dentry);
 	if (tid == ~0U)
 		goto out;
 
 	read_lock(&tasklist_lock);
-	task = find_task_by_pid(tid);
+	task = find_task_by_pid_ve(tid);
+	/* See comment above in similar place. */
+	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tid)) {
+		if (virt_pid(task) != tid)
+			task = NULL;
+	}
 	if (task)
 		get_task_struct(task);
 	read_unlock(&tasklist_lock);
@@ -2091,7 +2186,19 @@ static struct dentry *proc_task_lookup(s
 
 	d_add(dentry, inode);
 
+	/* proc_entry for non-leader threads should be 
+	 * /proc/<tgid>/task/<pid>, not /proc/<pid>.
+	 */
+	alive = dentry;
+	if (!thread_group_leader(task))
+		alive = proc_dentry_set(task, dentry, pd);
+
 	put_task_struct(task);
+	if (!alive) {
+		proc_pid_flush(pd);
+		goto out;
+	}
+	
 	return NULL;
 out_drop_task:
 	put_task_struct(task);
@@ -2104,27 +2211,35 @@ out:
  * tasklist lock while doing this, and we must release it before
  * we actually do the filldir itself, so we use a temp buffer..
  */
-static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
+static int get_tgid_list(int index, unsigned long version, unsigned int *tgids,
+		struct ve_struct *owner)
 {
 	struct task_struct *p;
 	int nr_tgids = 0;
 
 	index--;
 	read_lock(&tasklist_lock);
+	if (list_empty(&owner->vetask_lh))
+		goto out;
 	p = NULL;
 	if (version) {
-		p = find_task_by_pid(version);
-		if (p && !thread_group_leader(p))
+		struct ve_struct *oldve;
+
+		oldve = set_exec_env(owner);
+		p = find_task_by_pid_ve(version);
+		(void)set_exec_env(oldve);
+
+		if (p != NULL && !thread_group_leader(p))
 			p = NULL;
 	}
 
 	if (p)
 		index = 0;
 	else
-		p = next_task(&init_task);
+		p = __first_task_ve(owner);
 
-	for ( ; p != &init_task; p = next_task(p)) {
-		int tgid = p->pid;
+	for ( ; p != NULL; p = __next_task_ve(owner, p)) {
+		int tgid = get_task_pid_ve(p, owner);
 		if (!pid_alive(p))
 			continue;
 		if (--index >= 0)
@@ -2134,6 +2249,7 @@ static int get_tgid_list(int index, unsi
 		if (nr_tgids >= PROC_MAXPIDS)
 			break;
 	}
+out:
 	read_unlock(&tasklist_lock);
 	return nr_tgids;
 }
@@ -2157,7 +2273,7 @@ static int get_tid_list(int index, unsig
 	 * via next_thread().
 	 */
 	if (pid_alive(task)) do {
-		int tid = task->pid;
+		int tid = get_task_pid(task);
 
 		if (--index >= 0)
 			continue;
@@ -2171,74 +2287,62 @@ static int get_tid_list(int index, unsig
 	return nr_tids;
 }
 
-/*
- * Find the first task with tgid >= tgid
- *
- */
-static struct task_struct *next_tgid(unsigned int tgid)
-{
-	struct task_struct *task;
-	struct pid *pid;
-
-	read_lock(&tasklist_lock);
-retry:
-	task = NULL;
-	pid = find_ge_pid(tgid);
-	if (pid) {
-		tgid = pid->nr + 1;
-		task = pid_task(&pid->pid_list, PIDTYPE_PID);
-		/* What we to know is if the pid we have find is the
-		 * pid of a thread_group_leader.  Testing for task
-		 * being a thread_group_leader is the obvious thing
-		 * todo but there is a window when it fails, due to
-		 * the pid transfer logic in de_thread.
-		 *
-		 * So we perform the straight forward test of seeing
-		 * if the pid we have found is the pid of a thread
-		 * group leader, and don't worry if the task we have
-		 * found doesn't happen to be a thread group leader.
-		 * As we don't care in the case of readdir.
-		 */
-		if (!task || !thread_group_leader(task))
-			goto retry;
-		get_task_struct(task);
-	}
-	read_unlock(&tasklist_lock);
-	return task;
-}
-
-#define TGID_OFFSET (FIRST_PROCESS_ENTRY + (1 /* /proc/self */))
-
 /* for the /proc/ directory itself, after non-process stuff has been done */
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
+	unsigned int tgid_array[PROC_MAXPIDS];
 	char buf[PROC_NUMBUF];
 	unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-	struct task_struct *task;
-	int tgid;
+	unsigned int nr_tgids, i;
+	int next_tgid;
 
-	if (nr < 1) {
+	if (!nr) {
 		ino_t ino = fake_ino(0,PROC_TGID_INO);
 		if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
 			return 0;
 		filp->f_pos++;
+		nr++;
 	}
 
-	tgid = filp->f_pos - TGID_OFFSET;
-	for (task = next_tgid(tgid); task; task = next_tgid(tgid + 1)) {
-		int len;
-		ino_t ino;
-		tgid = task->pid;
-		filp->f_pos = tgid + TGID_OFFSET;
-		len = snprintf(buf, sizeof(buf), "%d", tgid);
-		ino = fake_ino(tgid, PROC_TGID_INO);
-		if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) {
-			put_task_struct(task);
-			goto out;
+	/* f_version caches the tgid value that the last readdir call couldn't
+	 * return. lseek aka telldir automagically resets f_version to 0.
+	 */
+	next_tgid = filp->f_version;
+	filp->f_version = 0;
+	for (;;) {
+		nr_tgids = get_tgid_list(nr, next_tgid, tgid_array,
+			VE_OWNER_FSTYPE(filp->f_dentry->d_sb->s_type));
+		if (!nr_tgids) {
+			/* no more entries ! */
+			break;
+		}
+		next_tgid = 0;
+
+		/* do not use the last found pid, reserve it for next_tgid */
+		if (nr_tgids == PROC_MAXPIDS) {
+			nr_tgids--;
+			next_tgid = tgid_array[nr_tgids];
+		}
+
+		for (i=0;i<nr_tgids;i++) {
+			int tgid = tgid_array[i];
+			ino_t ino = fake_ino(tgid,PROC_TGID_INO);
+			unsigned long j = PROC_NUMBUF;
+
+			do
+				buf[--j] = '0' + (tgid % 10);
+			while ((tgid /= 10) != 0);
+
+			if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) {
+				/* returning this tgid failed, save it as the first
+				 * pid for the next readir call */
+				filp->f_version = tgid_array[i];
+				goto out;
+			}
+			filp->f_pos++;
+			nr++;
 		}
-		put_task_struct(task);
 	}
-	filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
 out:
 	return 0;
 }
diff -Nurap linux-2.6.9-100.orig/fs/proc/generic.c linux-2.6.9-ve023stab054/fs/proc/generic.c
--- linux-2.6.9-100.orig/fs/proc/generic.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/proc/generic.c	2011-06-15 19:26:20.000000000 +0400
@@ -10,7 +10,9 @@
 
 #include <linux/errno.h>
 #include <linux/time.h>
+#include <linux/fs.h>
 #include <linux/proc_fs.h>
+#include <linux/ve_owner.h>
 #include <linux/stat.h>
 #include <linux/module.h>
 #include <linux/mount.h>
@@ -243,6 +245,7 @@ proc_file_lseek(struct file *file, loff_
 	return retval;
 }
 
+#ifndef CONFIG_VE
 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
@@ -263,9 +266,12 @@ static int proc_notify_change(struct den
 out:
 	return error;
 }
+#endif
 
 static struct inode_operations proc_file_inode_operations = {
+#ifndef CONFIG_VE
 	.setattr	= proc_notify_change,
+#endif
 };
 
 /*
@@ -273,16 +279,21 @@ static struct inode_operations proc_file
  * returns the struct proc_dir_entry for "/proc/tty/driver", and
  * returns "serial" in residual.
  */
-static int xlate_proc_name(const char *name,
-			   struct proc_dir_entry **ret, const char **residual)
+static int __xlate_proc_name(struct proc_dir_entry *root, const char *name,
+		struct proc_dir_entry **ret, const char **residual)
 {
 	const char     		*cp = name, *next;
 	struct proc_dir_entry	*de;
 	int			len;
 	int                     rtn = 0;
 
+	if (*ret != NULL) {
+		de_get(*ret);
+		return 0;
+	}
+
 	spin_lock(&proc_subdir_lock);
-	de = &proc_root;
+	de = root;
 	while (1) {
 		next = strchr(cp, '/');
 		if (!next)
@@ -300,12 +311,29 @@ static int xlate_proc_name(const char *n
 		cp += len + 1;
 	}
 	*residual = cp;
-	*ret = de;
+	*ret = de_get(de);
 out:
 	spin_unlock(&proc_subdir_lock);
 	return rtn;
 }
 
+#ifndef CONFIG_VE
+#define xlate_proc_loc_name xlate_proc_name
+#else
+static int xlate_proc_loc_name(const char *name,
+			   struct proc_dir_entry **ret, const char **residual)
+{
+	return __xlate_proc_name(get_exec_env()->proc_root,
+			name, ret, residual);
+}
+#endif
+
+static int xlate_proc_name(const char *name,
+		struct proc_dir_entry **ret, const char **residual)
+{
+	return __xlate_proc_name(&proc_root, name, ret, residual);
+}
+
 static DEFINE_IDR(proc_inum_idr);
 static spinlock_t proc_inum_lock = SPIN_LOCK_UNLOCKED; /* protects the above */
 
@@ -381,38 +409,122 @@ static struct dentry_operations proc_den
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
  */
+
+static inline struct proc_dir_entry *__proc_lookup(struct proc_dir_entry *de,
+		struct dentry *d)
+{
+	for (de = de->subdir; de; de = de->next) {
+		if (de->namelen != d->d_name.len)
+			continue;
+		if (!memcmp(d->d_name.name, de->name, de->namelen))
+			break;
+	}
+	return de_get(de);
+}
+
 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = NULL;
-	struct proc_dir_entry * de;
+	struct proc_dir_entry *lde, *gde;
 	int error = -ENOENT;
 
 	lock_kernel();
 	spin_lock(&proc_subdir_lock);
-	de = PDE(dir);
-	if (de) {
-		for (de = de->subdir; de ; de = de->next) {
-			if (de->namelen != dentry->d_name.len)
-				continue;
-			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
-				unsigned int ino = de->low_ino;
-
-				spin_unlock(&proc_subdir_lock);
-				error = -EINVAL;
-				inode = proc_get_inode(dir->i_sb, ino, de);
-				spin_lock(&proc_subdir_lock);
-				break;
-			}
-		}
+	lde = LPDE(dir);
+	if (!lde) {
+		spin_unlock(&proc_subdir_lock);
+		goto out;
 	}
+
+	lde = __proc_lookup(lde, dentry);
+	if (lde && !try_module_get(lde->owner)) {
+		de_put(lde);
+		lde = NULL;
+	}
+#ifdef CONFIG_VE
+	gde = GPDE(dir);
+	if (gde)
+		gde = __proc_lookup(gde, dentry);
+	if (!lde && gde && !try_module_get(gde->owner)) {
+		de_put(gde);
+		gde = NULL;
+	}
+#else
+	gde = NULL;
+#endif
 	spin_unlock(&proc_subdir_lock);
-	unlock_kernel();
+	/*
+	 * There are following possible cases after lookup:
+	 *
+	 * lde		gde
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * NULL		NULL		ENOENT
+	 * loc		NULL		found in local tree
+	 * loc		glob		found in both trees
+	 * NULL		glob		found in global tree
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 *
+	 * We initialized inode as follows after lookup:
+	 *
+	 * inode->lde	inode->gde
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * loc		NULL		in local tree
+	 * loc		glob		both trees
+	 * glob		glob		global tree
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * i.e. inode->lde is always initialized
+	 */
+
+	if (lde == NULL && gde == NULL)
+		goto out;
+
+	if (lde != NULL) {
+		inode = proc_get_inode(dir->i_sb, lde->low_ino, lde);
+	} else {
+		inode = proc_get_inode(dir->i_sb, gde->low_ino, gde);
+	}
 
 	if (inode) {
+#ifdef CONFIG_VE
+		GPDE(inode) = gde;
+		if (gde) {
+			de_get(gde);
+			/* we have taken a ref above already */
+			__module_get(gde->owner);
+		}
+		/* if dentry is found in both trees and it is a directory
+		 * then inode's nlink count must be altered, because local
+		 * and global subtrees may differ.
+		 * on the other hand, they may intersect, so actual nlink
+		 * value is difficult to calculate - upper estimate is used
+		 * instead of it.
+		 * dentry found in global tree only must not be writable
+		 * in non-super ve.
+		 */
+		if (lde && gde && lde != gde && gde->nlink > 1)
+			inode->i_nlink += gde->nlink - 2;
+		if (lde == NULL && !ve_is_super(
+					VE_OWNER_FSTYPE(dir->i_sb->s_type)))
+			inode->i_mode &= ~S_IWUGO;
+#endif
+		unlock_kernel();
+		de_put(lde);
+		de_put(gde);
 		dentry->d_op = &proc_dentry_operations;
 		d_add(dentry, inode);
 		return NULL;
+	} else {
+		if (lde)
+			module_put(lde->owner);
+		else
+			module_put(gde->owner);
 	}
+
+	de_put(lde);
+	de_put(gde);
+
+out:
+	unlock_kernel();
 	return ERR_PTR(error);
 }
 
@@ -463,11 +575,8 @@ int proc_readdir(struct file * filp,
 			de = de->subdir;
 			i -= 2;
 			for (;;) {
-				if (!de) {
-					ret = 1;
-					spin_unlock(&proc_subdir_lock);
-					goto out;
-				}
+				if (!de)
+					goto lookup_global;
 				if (!i)
 					break;
 				de = de->next;
@@ -475,20 +584,70 @@ int proc_readdir(struct file * filp,
 			}
 
 			do {
+				struct proc_dir_entry *next;
                                /* filldir passes info to user space */
+				de_get(de);
 				spin_unlock(&proc_subdir_lock);
 				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
-					    de->low_ino, de->mode >> 12) < 0)
+					    de->low_ino, de->mode >> 12) < 0) {
+					de_put(de);
 					goto out;
+				}
 				spin_lock(&proc_subdir_lock);
 				filp->f_pos++;
-				de = de->next;
+				next = de->next;
+				de_put(de);
+				de = next;
 			} while (de);
+lookup_global:
+#ifdef CONFIG_VE
+			de = GPDE(inode);
+			if (de == NULL)
+				goto done;
+
+			de = de->subdir;
+
+			while (de) {
+				struct proc_dir_entry *p;
+
+				/* check that we haven't filled this dir already */
+				for (p = LPDE(inode)->subdir; p; p = p->next) {
+					if (de->namelen != p->namelen)
+						continue;
+					if (!memcmp(de->name, p->name, p->namelen))
+						break;
+				}
+				if (p) {
+					de = de->next;
+					continue;
+				}
+				/* skip first i entries */
+				if (i > 0) {
+					i--;
+					de = de->next;
+					continue;
+				}
+
+				de_get(de);
+				spin_unlock(&proc_subdir_lock);
+				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
+							de->low_ino, de->mode >> 12) < 0) {
+					de_put(de);
+					goto out;
+				}
+				spin_lock(&proc_subdir_lock);
+				filp->f_pos++;
+				p = de->next;
+				de_put(de);
+				de = p;
+			}
+done:
+#endif
 			spin_unlock(&proc_subdir_lock);
 	}
 	ret = 1;
 out:	unlock_kernel();
-	return ret;	
+	return ret;
 }
 
 /*
@@ -506,7 +665,9 @@ static struct file_operations proc_dir_o
  */
 static struct inode_operations proc_dir_inode_operations = {
 	.lookup		= proc_lookup,
+#ifndef CONFIG_VE
 	.setattr	= proc_notify_change,
+#endif
 };
 
 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
@@ -519,8 +680,13 @@ static int proc_register(struct proc_dir
 	dp->low_ino = i;
 
 	spin_lock(&proc_subdir_lock);
+	if (dir->deleted) {
+		spin_unlock(&proc_subdir_lock);
+		return -EINVAL;
+	}
+
 	dp->next = dir->subdir;
-	dp->parent = dir;
+	dp->parent = de_get(dir);
 	dir->subdir = dp;
 	spin_unlock(&proc_subdir_lock);
 
@@ -584,12 +750,13 @@ static struct proc_dir_entry *__proc_cre
 	/* make sure name is valid */
 	if (!name || !strlen(name)) goto out;
 
-	if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
+	if (xlate_proc_loc_name(name, parent, &fn) != 0)
 		goto out;
 	len = strlen(fn);
 
 	ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
-	if (!ent) goto out;
+	if (!ent)
+		goto out_put;
 
 	memset(ent, 0, sizeof(struct proc_dir_entry));
 	memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
@@ -597,8 +764,13 @@ static struct proc_dir_entry *__proc_cre
 	ent->namelen = len;
 	ent->mode = mode;
 	ent->nlink = nlink;
- out:
+	atomic_set(&ent->count, 1);
 	return ent;
+
+out_put:
+	de_put(*parent);
+out:
+	return NULL;
 }
 
 struct proc_dir_entry *proc_symlink(const char *name,
@@ -622,6 +794,7 @@ struct proc_dir_entry *proc_symlink(cons
 			kfree(ent);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
@@ -640,6 +813,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
 			kfree(ent);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
@@ -678,6 +852,7 @@ struct proc_dir_entry *create_proc_entry
 			kfree(ent);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
@@ -714,6 +889,22 @@ out:
 	return NULL;
 }
 
+struct proc_dir_entry *create_proc_glob_entry(const char *name, mode_t mode,
+					 struct proc_dir_entry *parent)
+{
+	const char *path;
+	struct proc_dir_entry *ent;
+
+	path = name;
+	if (xlate_proc_name(path, &parent, &name) != 0)
+		return NULL;
+
+	ent = create_proc_entry(name, mode, parent);
+	de_put(parent);
+	return ent;
+}
+EXPORT_SYMBOL(create_proc_glob_entry);
+
 void free_proc_entry(struct proc_dir_entry *de)
 {
 	unsigned int ino = de->low_ino;
@@ -732,17 +923,14 @@ void free_proc_entry(struct proc_dir_ent
  * Remove a /proc entry and free it if it's not currently in use.
  * If it is in use, we set the 'deleted' flag.
  */
-void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+static void __remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 {
 	struct proc_dir_entry **p;
 	struct proc_dir_entry *de;
 	const char *fn = name;
 	int len;
 
-	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
-		goto out;
 	len = strlen(fn);
-
 	spin_lock(&proc_subdir_lock);
 	for (p = &parent->subdir; *p; p=&(*p)->next ) {
 		if (!proc_match(len, fn, *p))
@@ -750,21 +938,49 @@ void remove_proc_entry(const char *name,
 		de = *p;
 		*p = de->next;
 		de->next = NULL;
+		de_put(parent);
 		if (S_ISDIR(de->mode))
 			parent->nlink--;
 		proc_kill_inodes(de);
 		de->nlink = 0;
-		BUG_ON(de->subdir);
-		if (!atomic_read(&de->count))
-			free_proc_entry(de);
-		else {
-			de->deleted = 1;
-			printk("remove_proc_entry: %s/%s busy, count=%d\n",
-				parent->name, de->name, atomic_read(&de->count));
-		}
+		WARN_ON(de->subdir);
+		de->deleted = 1;
+		de_put(de);
 		break;
 	}
 	spin_unlock(&proc_subdir_lock);
-out:
 	return;
 }
+
+void remove_proc_loc_entry(const char *name, struct proc_dir_entry *parent)
+{
+	const char *path;
+
+	path = name;
+	if (xlate_proc_loc_name(path, &parent, &name))
+		return;
+
+	__remove_proc_entry(name, parent);
+	de_put(parent);
+}
+
+void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent)
+{
+	const char *path;
+
+	path = name;
+	if (xlate_proc_name(path, &parent, &name))
+		return;
+
+	__remove_proc_entry(name, parent);
+	de_put(parent);
+}
+
+void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+{
+	remove_proc_loc_entry(name, parent);
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		remove_proc_glob_entry(name, parent);
+#endif
+}
diff -Nurap linux-2.6.9-100.orig/fs/proc/inode.c linux-2.6.9-ve023stab054/fs/proc/inode.c
--- linux-2.6.9-100.orig/fs/proc/inode.c	2011-06-09 19:22:59.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/proc/inode.c	2011-06-15 19:26:20.000000000 +0400
@@ -8,6 +8,7 @@
 #include <linux/proc_fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/ve_owner.h>
 #include <linux/string.h>
 #include <linux/stat.h>
 #include <linux/file.h>
@@ -22,34 +23,23 @@
 
 extern void free_proc_entry(struct proc_dir_entry *);
 
-static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
-{
-	if (de)
-		atomic_inc(&de->count);
-	return de;
-}
-
 /*
  * Decrements the use count and checks for deferred deletion.
  */
-static void de_put(struct proc_dir_entry *de)
+void de_put(struct proc_dir_entry *de)
 {
 	if (de) {	
-		lock_kernel();		
 		if (!atomic_read(&de->count)) {
 			printk("de_put: entry %s already free!\n", de->name);
-			unlock_kernel();
 			return;
 		}
 
 		if (atomic_dec_and_test(&de->count)) {
-			if (de->deleted) {
+			if (unlikely(!de->deleted))
 				printk("de_put: deferred delete of %s\n",
-					de->name);
-				free_proc_entry(de);
-			}
-		}		
-		unlock_kernel();
+						de->name);
+			free_proc_entry(de);
+		}
 	}
 }
 
@@ -68,7 +58,12 @@ static void proc_delete_inode(struct ino
 		put_task_struct(tsk);
 
 	/* Let go of any associated proc directory entry */
-	de = ei->pde;
+	de = LPDE(inode);
+	if (de) {
+		de_put(de);
+	}
+#ifdef CONFIG_VE
+	de = GPDE(inode);
 	if (de) {
 		if (de->owner) {
 			if (atomic_add_negative(1, &ei->mod_refs))
@@ -78,6 +73,7 @@ static void proc_delete_inode(struct ino
 		}
 		de_put(de);
 	}
+#endif
 	clear_inode(inode);
 }
 
@@ -105,6 +101,9 @@ static struct inode *proc_alloc_inode(st
 	atomic_set(&ei->mod_refs, -1);
 	inode = &ei->vfs_inode;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+#ifdef CONFIG_VE
+	GPDE(inode) = NULL;
+#endif
 	return inode;
 }
 
@@ -252,7 +251,9 @@ int proc_fill_super(struct super_block *
 	s->s_blocksize_bits = 10;
 	s->s_magic = PROC_SUPER_MAGIC;
 	s->s_op = &proc_sops;
-	
+
+	/* proc_root.owner == NULL, just a formal call */
+	__module_get(proc_root.owner);
 	root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
 	if (!root_inode)
 		goto out_no_root;
@@ -263,6 +264,14 @@ int proc_fill_super(struct super_block *
 	s->s_root = d_alloc_root(root_inode);
 	if (!s->s_root)
 		goto out_no_root;
+
+#ifdef CONFIG_VE
+	LPDE(root_inode) = de_get(get_exec_env()->proc_root);
+	GPDE(root_inode) = &proc_root;
+#else
+	LPDE(root_inode) = &proc_root;
+#endif
+
 	parse_options(data, &root_inode->i_uid, &root_inode->i_gid);
 	return 0;
 
diff -Nurap linux-2.6.9-100.orig/fs/proc/kcore.c linux-2.6.9-ve023stab054/fs/proc/kcore.c
--- linux-2.6.9-100.orig/fs/proc/kcore.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/proc/kcore.c	2011-06-15 19:26:19.000000000 +0400
@@ -25,7 +25,7 @@
 
 static int open_kcore(struct inode * inode, struct file * filp)
 {
-	return -EPERM;
+	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
 
 static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *);
diff -Nurap linux-2.6.9-100.orig/fs/proc/kmsg.c linux-2.6.9-ve023stab054/fs/proc/kmsg.c
--- linux-2.6.9-100.orig/fs/proc/kmsg.c	2004-10-19 01:55:29.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/proc/kmsg.c	2011-06-15 19:26:19.000000000 +0400
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/poll.h>
 #include <linux/fs.h>
+#include <linux/veprintk.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -40,7 +41,7 @@ static ssize_t kmsg_read(struct file *fi
 
 static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 {
-	poll_wait(file, &log_wait, wait);
+	poll_wait(file, &ve_log_wait, wait);
 	if (do_syslog(9, NULL, 0))
 		return POLLIN | POLLRDNORM;
 	return 0;
diff -Nurap linux-2.6.9-100.orig/fs/proc/proc_misc.c linux-2.6.9-ve023stab054/fs/proc/proc_misc.c
--- linux-2.6.9-100.orig/fs/proc/proc_misc.c	2011-06-09 19:22:53.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/proc/proc_misc.c	2011-06-15 19:26:22.000000000 +0400
@@ -31,6 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
+#include <linux/virtinfo.h>
 #include <linux/smp.h>
 #include <linux/signal.h>
 #include <linux/module.h>
@@ -44,14 +45,17 @@
 #include <linux/jiffies.h>
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/compile.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
 #include <asm/tlb.h>
 #include <asm/div64.h>
+#ifdef CONFIG_FAIRSCHED
+#include <linux/fairsched.h>
+#endif
 
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 /*
  * Warning: stuff below (imported functions) assumes that its output will fit
  * into one page. For some of those functions it may be wrong. Moreover, we
@@ -83,15 +87,33 @@ static int loadavg_read_proc(char *page,
 {
 	int a, b, c;
 	int len;
-
-	a = avenrun[0] + (FIXED_1/200);
-	b = avenrun[1] + (FIXED_1/200);
-	c = avenrun[2] + (FIXED_1/200);
+	unsigned long __nr_running;
+	int __nr_threads;
+	unsigned long *__avenrun;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+
+	if (ve_is_super(ve)) {
+		__avenrun = &avenrun[0];
+		__nr_running = nr_running();
+		__nr_threads = nr_threads;
+	} 
+#ifdef CONFIG_VE
+	else {
+		__avenrun = &ve->avenrun[0];
+		__nr_running = nr_running_ve(ve); 
+		__nr_threads = atomic_read(&ve->pcounter);
+	}
+#endif
+	a = __avenrun[0] + (FIXED_1/200);
+	b = __avenrun[1] + (FIXED_1/200);
+	c = __avenrun[2] + (FIXED_1/200);
 	len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
 		LOAD_INT(a), LOAD_FRAC(a),
 		LOAD_INT(b), LOAD_FRAC(b),
 		LOAD_INT(c), LOAD_FRAC(c),
-		nr_running(), nr_threads, last_pid);
+		__nr_running, __nr_threads, last_pid);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
@@ -139,6 +161,13 @@ static int uptime_read_proc(char *page, 
 	u64 idle_jiffies = init_task.utime + init_task.stime;
 
 	do_posix_clock_monotonic_gettime(&uptime);
+#ifdef CONFIG_VE
+	if (!ve_is_super(get_exec_env())) {
+		set_normalized_timespec(&uptime,
+		      uptime.tv_sec - get_exec_env()->start_timespec.tv_sec,
+		      uptime.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
+	}
+#endif
 	jiffies_to_timespec(idle_jiffies, &idle);
 	len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
 			(unsigned long) uptime.tv_sec,
@@ -152,34 +181,36 @@ static int uptime_read_proc(char *page, 
 static int meminfo_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
-	struct sysinfo i;
+	struct meminfo mi;
 	int len;
-	struct page_state ps;
-	unsigned long inactive;
-	unsigned long active;
-	unsigned long free;
-	unsigned long vmtot;
-	unsigned long committed;
-	unsigned long allowed;
+	unsigned long dummy;
 	struct vmalloc_info vmi;
 
-	get_page_state(&ps);
-	get_zone_counts(&active, &inactive, &free);
+	get_page_state(&mi.ps);
+	get_zone_counts(&mi.active, &mi.inactive, &dummy);
 
 /*
  * display in kilobytes.
  */
 #define K(x) ((x) << (PAGE_SHIFT - 10))
-	si_meminfo(&i);
-	si_swapinfo(&i);
-	committed = atomic_read(&vm_committed_space);
-	allowed = ((totalram_pages - hugetlb_total_pages())
+	si_meminfo(&mi.si);
+	si_swapinfo(&mi.si);
+	mi.committed_space = atomic_read(&vm_committed_space);
+	mi.allowed = ((totalram_pages - hugetlb_total_pages())
 		* sysctl_overcommit_ratio / 100) + total_swap_pages;
+	mi.swapcache = total_swapcache_pages;
+	mi.cache = get_page_cache_size() - mi.swapcache - mi.si.bufferram;
 
-	vmtot = (VMALLOC_END-VMALLOC_START)>>10;
+	mi.vmalloc_total = (VMALLOC_END - VMALLOC_START) >> PAGE_SHIFT;
 	vmi = get_vmalloc_info();
-	vmi.used >>= 10;
-	vmi.largest_chunk >>= 10;
+	mi.vmalloc_used = vmi.used >> PAGE_SHIFT;
+	mi.vmalloc_largest = vmi.largest_chunk >> PAGE_SHIFT;
+
+#ifdef CONFIG_USER_RESOURCE
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
+			& NOTIFY_FAIL)
+		return -ENOMSG;
+#endif
 
 	/*
 	 * Tagged format, for easy grepping and expansion.
@@ -208,32 +239,36 @@ static int meminfo_read_proc(char *page,
 		"VmallocTotal: %8lu kB\n"
 		"VmallocUsed:  %8lu kB\n"
 		"VmallocChunk: %8lu kB\n",
-		K(i.totalram),
-		K(i.freeram),
-		K(i.bufferram),
-		K(get_page_cache_size()-total_swapcache_pages-i.bufferram),
-		K(total_swapcache_pages),
-		K(active),
-		K(inactive),
-		K(i.totalhigh),
-		K(i.freehigh),
-		K(i.totalram-i.totalhigh),
-		K(i.freeram-i.freehigh),
-		K(i.totalswap),
-		K(i.freeswap),
-		K(ps.nr_dirty),
-		K(ps.nr_writeback),
-		K(ps.nr_mapped),
-		K(ps.nr_slab),
-		K(allowed),
-		K(committed),
-		K(ps.nr_page_table_pages),
-		vmtot,
-		vmi.used,
-		vmi.largest_chunk
+		K(mi.si.totalram),
+		K(mi.si.freeram),
+		K(mi.si.bufferram),
+		K(mi.cache),
+		K(mi.swapcache),
+		K(mi.active),
+		K(mi.inactive),
+		K(mi.si.totalhigh),
+		K(mi.si.freehigh),
+		K(mi.si.totalram-mi.si.totalhigh),
+		K(mi.si.freeram-mi.si.freehigh),
+		K(mi.si.totalswap),
+		K(mi.si.freeswap),
+		K(mi.ps.nr_dirty),
+		K(mi.ps.nr_writeback),
+		K(mi.ps.nr_mapped),
+		K(mi.ps.nr_slab),
+		K(mi.allowed),
+		K(mi.committed_space),
+		K(mi.ps.nr_page_table_pages),
+		K(mi.vmalloc_total),
+		K(mi.vmalloc_used),
+		K(mi.vmalloc_largest)
 		);
 
+#ifdef CONFIG_HUGETLB_PAGE
+#warning Virtualize hugetlb_report_meminfo
+#else
 		len += hugetlb_report_meminfo(page + len);
+#endif
 
 	return proc_calc_metrics(page, start, off, count, eof, len);
 #undef K
@@ -258,8 +293,15 @@ static int version_read_proc(char *page,
 {
 	extern char *linux_banner;
 	int len;
+	struct new_utsname *utsname = &ve_utsname;
 
-	strcpy(page, linux_banner);
+	if (ve_is_super(get_exec_env()))
+		strcpy(page, linux_banner);
+	else
+		sprintf(page, "Linux version %s ("
+		      LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") ("
+		      LINUX_COMPILER ") %s\n",
+		      utsname->release, utsname->version);
 	len = strlen(page);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
@@ -307,6 +349,9 @@ static int devinfo_show(struct seq_file 
 
 static void *devinfo_start(struct seq_file *f, loff_t *pos)
 {
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
+
 	if (*pos <= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
 	       return pos;
 	return NULL;
@@ -415,21 +460,14 @@ static struct file_operations proc_slabi
 	.release	= seq_release,
 };
 
-int show_stat(struct seq_file *p, void *v)
+static void show_stat_ve0(struct seq_file *p)
 {
-	int i;
-	extern unsigned long total_forks;
-	unsigned long jif;
-	u64	sum = 0, user = 0, nice = 0, system = 0,
-		idle = 0, iowait = 0, irq = 0, softirq = 0;
-
-	jif = - wall_to_monotonic.tv_sec;
-	if (wall_to_monotonic.tv_nsec)
-		--jif;
+	int i, j;
+	struct page_state page_state;
+	u64 sum, user, nice, system, idle, iowait, irq, softirq;
 
+	sum = user = nice = system = idle = iowait = irq = softirq = 0;
 	for_each_cpu(i) {
-		int j;
-
 		user += kstat_cpu(i).cpustat.user;
 		nice += kstat_cpu(i).cpustat.nice;
 		system += kstat_cpu(i).cpustat.system;
@@ -449,8 +487,8 @@ int show_stat(struct seq_file *p, void *
 		(unsigned long long)jiffies_64_to_clock_t(iowait),
 		(unsigned long long)jiffies_64_to_clock_t(irq),
 		(unsigned long long)jiffies_64_to_clock_t(softirq));
-	for_each_online_cpu(i) {
 
+	for_each_online_cpu(i) {
 		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
 		user = kstat_cpu(i).cpustat.user;
 		nice = kstat_cpu(i).cpustat.nice;
@@ -459,6 +497,7 @@ int show_stat(struct seq_file *p, void *
 		iowait = kstat_cpu(i).cpustat.iowait;
 		irq = kstat_cpu(i).cpustat.irq;
 		softirq = kstat_cpu(i).cpustat.softirq;
+
 		seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu\n",
 			i,
 			(unsigned long long)jiffies_64_to_clock_t(user),
@@ -475,6 +514,85 @@ int show_stat(struct seq_file *p, void *
 	for (i = 0; i < NR_IRQS; i++)
 		seq_printf(p, " %u", kstat_irqs(i));
 #endif
+	get_full_page_state(&page_state);
+	seq_printf(p, "\nswap %lu %lu",
+			page_state.pswpin, page_state.pswpout);
+}
+
+#ifdef CONFIG_VE
+static void show_stat_ve(struct seq_file *p, struct ve_struct *env)
+{
+	int i;
+	u64 user, nice, system;
+	cycles_t idle, iowait;
+	cpumask_t ve_cpus;
+
+	ve_cpu_online_map(env, &ve_cpus);
+
+	user = nice = system = idle = iowait = 0;
+	for_each_cpu_mask(i, ve_cpus) {
+		user += VE_CPU_STATS(env, i)->user;
+		nice += VE_CPU_STATS(env, i)->nice;
+		system += VE_CPU_STATS(env, i)->system;
+		idle += ve_sched_get_idle_time(env, i);
+		iowait += ve_sched_get_iowait_time(env, i);
+	}
+
+	seq_printf(p, "cpu  %llu %llu %llu %llu %llu 0 0\n",
+		(unsigned long long)jiffies_64_to_clock_t(user),
+		(unsigned long long)jiffies_64_to_clock_t(nice),
+		(unsigned long long)jiffies_64_to_clock_t(system),
+		(unsigned long long)cycles_to_clocks(idle),
+		(unsigned long long)cycles_to_clocks(iowait));
+
+	for_each_cpu_mask(i, ve_cpus) {
+		user = VE_CPU_STATS(env, i)->user;
+		nice = VE_CPU_STATS(env, i)->nice;
+		system = VE_CPU_STATS(env, i)->system;
+		idle = ve_sched_get_idle_time(env, i);
+		iowait = ve_sched_get_iowait_time(env, i);
+
+		seq_printf(p, "cpu%d %llu %llu %llu %llu %llu 0 0\n",
+			i,
+			(unsigned long long)jiffies_64_to_clock_t(user),
+			(unsigned long long)jiffies_64_to_clock_t(nice),
+			(unsigned long long)jiffies_64_to_clock_t(system),
+			(unsigned long long)cycles_to_clocks(idle),
+			(unsigned long long)cycles_to_clocks(iowait));
+	}
+	seq_printf(p, "intr 0");
+	seq_printf(p, "\nswap %d %d", 0, 0);
+}
+#endif
+
+int show_stat(struct seq_file *p, void *v)
+{
+	extern unsigned long total_forks;
+	unsigned long seq, jif;
+	struct ve_struct *env;
+	unsigned long __nr_running, __nr_iowait;
+ 
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		jif = - wall_to_monotonic.tv_sec;
+		if (wall_to_monotonic.tv_nsec)
+			--jif;
+	} while (read_seqretry(&xtime_lock, seq));
+
+	env = get_exec_env();
+	if (ve_is_super(env)) {
+		show_stat_ve0(p);
+		__nr_running = nr_running();
+		__nr_iowait = nr_iowait();
+	}
+#ifdef CONFIG_VE
+	else {
+		show_stat_ve(p, env);
+		__nr_running = nr_running_ve(env);
+		__nr_iowait = nr_iowait_ve();
+		jif += env->start_timespec.tv_sec;
+	}
+#endif
 
 	seq_printf(p,
 		"\nctxt %llu\n"
@@ -485,8 +603,8 @@ int show_stat(struct seq_file *p, void *
 		nr_context_switches(),
 		(unsigned long)jif,
 		total_forks,
-		nr_running(),
-		nr_iowait());
+		__nr_running,
+		__nr_iowait);
 
 	return 0;
 }
@@ -583,7 +701,8 @@ static int cmdline_read_proc(char *page,
 {
 	int len;
 
-	len = sprintf(page, "%s\n", saved_command_line);
+	len = sprintf(page, "%s\n",
+		ve_is_super(get_exec_env()) ? saved_command_line : "quiet");
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
diff -Nurap linux-2.6.9-100.orig/fs/proc/proc_tty.c linux-2.6.9-ve023stab054/fs/proc/proc_tty.c
--- linux-2.6.9-100.orig/fs/proc/proc_tty.c	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/proc/proc_tty.c	2011-06-15 19:26:19.000000000 +0400
@@ -6,6 +6,7 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/ve_owner.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/time.h>
@@ -108,24 +109,35 @@ static int show_tty_driver(struct seq_fi
 /* iterator */
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-	struct list_head *p;
+	struct tty_driver *drv;
+
 	loff_t l = *pos;
-	list_for_each(p, &tty_drivers)
+	read_lock(&tty_driver_guard);
+	list_for_each_entry(drv, &tty_drivers, tty_drivers) {
+		if (!ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
+			continue;
 		if (!l--)
-			return list_entry(p, struct tty_driver, tty_drivers);
+			return drv;
+	}
 	return NULL;
 }
 
 static void *t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
+	struct tty_driver *drv;
+
 	(*pos)++;
-	return p==&tty_drivers ? NULL :
-			list_entry(p, struct tty_driver, tty_drivers);
+	drv = (struct tty_driver *)v;
+	list_for_each_entry_continue(drv, &tty_drivers, tty_drivers) {
+		if (ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
+			return drv;
+	}
+	return NULL;
 }
 
 static void t_stop(struct seq_file *m, void *v)
 {
+	read_unlock(&tty_driver_guard);
 }
 
 static struct seq_operations tty_drivers_op = {
diff -Nurap linux-2.6.9-100.orig/fs/proc/root.c linux-2.6.9-ve023stab054/fs/proc/root.c
--- linux-2.6.9-100.orig/fs/proc/root.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/proc/root.c	2011-06-15 19:26:19.000000000 +0400
@@ -30,12 +30,14 @@ static struct super_block *proc_get_sb(s
 	return get_sb_single(fs_type, flags, data, proc_fill_super);
 }
 
-static struct file_system_type proc_fs_type = {
+struct file_system_type proc_fs_type = {
 	.name		= "proc",
 	.get_sb		= proc_get_sb,
 	.kill_sb	= kill_anon_super,
 };
 
+EXPORT_SYMBOL(proc_fs_type);
+
 extern int __init proc_init_inodecache(void);
 void __init proc_root_init(void)
 {
diff -Nurap linux-2.6.9-100.orig/fs/quota.c linux-2.6.9-ve023stab054/fs/quota.c
--- linux-2.6.9-100.orig/fs/quota.c	2004-10-19 01:55:28.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/quota.c	2011-06-15 19:26:20.000000000 +0400
@@ -94,26 +94,29 @@ static int check_quotactl_valid(struct s
 	if (cmd == Q_GETQUOTA || cmd == Q_XGETQUOTA) {
 		if (((type == USRQUOTA && current->euid != id) ||
 		     (type == GRPQUOTA && !in_egroup_p(id))) &&
-		    !capable(CAP_SYS_ADMIN))
+		    !capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 	}
 	else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO && cmd != Q_XGETQSTAT)
-		if (!capable(CAP_SYS_ADMIN))
+		if (!capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 
 	return security_quotactl (cmd, type, id, sb);
 }
 
-static struct super_block *get_super_to_sync(int type)
+void sync_dquots(struct super_block *sb, int type)
 {
-	struct list_head *head;
 	int cnt, dirty;
-
-restart:
+	
+	if (sb) {
+		if (sb->s_qcop && sb->s_qcop->quota_sync)
+			sb->s_qcop->quota_sync(sb, type);
+		return;
+	}
+	
 	spin_lock(&sb_lock);
-	list_for_each(head, &super_blocks) {
-		struct super_block *sb = list_entry(head, struct super_block, s_list);
-
+restart:		
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		/* This test just improves performance so it needn't be reliable... */
 		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
 			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
@@ -124,29 +127,14 @@ restart:
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
-		if (!sb->s_root) {
-			drop_super(sb);
+		if (sb->s_root && sb->s_qcop->quota_sync)
+			sb->s_qcop->quota_sync(sb, type);
+		up_read(&sb->s_umount);
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
 			goto restart;
-		}
-		return sb;
 	}
 	spin_unlock(&sb_lock);
-	return NULL;
-}
-
-void sync_dquots(struct super_block *sb, int type)
-{
-	if (sb) {
-		if (sb->s_qcop->quota_sync)
-			sb->s_qcop->quota_sync(sb, type);
-	}
-	else {
-		while ((sb = get_super_to_sync(type)) != 0) {
-			if (sb->s_qcop->quota_sync)
-				sb->s_qcop->quota_sync(sb, type);
-			drop_super(sb);
-		}
-	}
 }
 
 /* Copy parameters and call proper function */
@@ -258,6 +246,250 @@ static int do_quotactl(struct super_bloc
 	return 0;
 }
 
+static struct super_block *quota_get_sb(const char __user *special)
+{
+	struct super_block *sb;
+	struct block_device *bdev;
+	char *tmp;
+
+	tmp = getname(special);
+	if (IS_ERR(tmp))
+		return (struct super_block *)tmp;
+	bdev = lookup_bdev(tmp, FMODE_QUOTACTL);
+	putname(tmp);
+	if (IS_ERR(bdev))
+		return (struct super_block *)bdev;
+	sb = get_super(bdev);
+	bdput(bdev);
+	if (!sb)
+		return ERR_PTR(-ENODEV);
+	return sb;
+}
+
+#ifdef CONFIG_QUOTA_COMPAT
+
+#define QC_QUOTAON  0x0100	/* enable quotas */
+#define QC_QUOTAOFF 0x0200	/* disable quotas */
+/* GETQUOTA, SETQUOTA and SETUSE which were at 0x0300-0x0500 has now other parameteres */
+#define QC_SYNC     0x0600	/* sync disk copy of a filesystems quotas */
+#define QC_SETQLIM  0x0700	/* set limits */
+/* GETSTATS at 0x0800 is now longer... */
+#define QC_GETINFO  0x0900	/* get info about quotas - graces, flags... */
+#define QC_SETINFO  0x0A00	/* set info about quotas */
+#define QC_SETGRACE 0x0B00	/* set inode and block grace */
+#define QC_SETFLAGS 0x0C00	/* set flags for quota */
+#define QC_GETQUOTA 0x0D00	/* get limits and usage */
+#define QC_SETQUOTA 0x0E00	/* set limits and usage */
+#define QC_SETUSE   0x0F00	/* set usage */
+/* 0x1000 used by old RSQUASH */
+#define QC_GETSTATS 0x1100	/* get collected stats */
+#define QC_GETQUOTI 0x2B00	/* get limits and usage by index */
+
+struct compat_dqblk {
+	unsigned int dqb_ihardlimit;
+	unsigned int dqb_isoftlimit;
+	unsigned int dqb_curinodes;
+	unsigned int dqb_bhardlimit;
+	unsigned int dqb_bsoftlimit;
+	qsize_t dqb_curspace;
+	__kernel_time_t dqb_btime;
+	__kernel_time_t dqb_itime;
+};
+
+struct compat_dqinfo {
+	unsigned int dqi_bgrace;
+	unsigned int dqi_igrace;
+	unsigned int dqi_flags;
+	unsigned int dqi_blocks;
+	unsigned int dqi_free_blk;
+	unsigned int dqi_free_entry;
+};
+
+struct compat_dqstats {
+	__u32 lookups;
+	__u32 drops;
+	__u32 reads;
+	__u32 writes;
+	__u32 cache_hits;
+	__u32 allocated_dquots;
+	__u32 free_dquots;
+	__u32 syncs;
+	__u32 version;
+};
+
+asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr);
+static long compat_quotactl(unsigned int cmds, unsigned int type,
+		const char __user *special, qid_t id,
+		void __user *addr)
+{
+	struct super_block *sb;
+	long ret;
+
+	sb = NULL;
+	switch (cmds) {
+		case QC_QUOTAON:
+			return sys_quotactl(QCMD(Q_QUOTAON, type),
+					special, id, addr);
+
+		case QC_QUOTAOFF:
+			return sys_quotactl(QCMD(Q_QUOTAOFF, type),
+					special, id, addr);
+
+		case QC_SYNC:
+			return sys_quotactl(QCMD(Q_SYNC, type),
+					special, id, addr);
+
+		case QC_GETQUOTA: {
+			struct if_dqblk idq;
+			struct compat_dqblk cdq;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
+			if (ret)
+				break;
+			ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
+			if (ret)
+				break;
+			cdq.dqb_ihardlimit = idq.dqb_ihardlimit;
+			cdq.dqb_isoftlimit = idq.dqb_isoftlimit;
+			cdq.dqb_curinodes = idq.dqb_curinodes;
+			cdq.dqb_bhardlimit = idq.dqb_bhardlimit;
+			cdq.dqb_bsoftlimit = idq.dqb_bsoftlimit;
+			cdq.dqb_curspace = idq.dqb_curspace;
+			cdq.dqb_btime = idq.dqb_btime;
+			cdq.dqb_itime = idq.dqb_itime;
+			ret = 0;
+			if (copy_to_user(addr, &cdq, sizeof(cdq)))
+				ret = -EFAULT;
+			break;
+		}
+
+		case QC_SETQUOTA:
+		case QC_SETUSE:
+		case QC_SETQLIM: {
+			struct if_dqblk idq;
+			struct compat_dqblk cdq;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_SETQUOTA, id);
+			if (ret)
+				break;
+			ret = -EFAULT;
+			if (copy_from_user(&cdq, addr, sizeof(cdq)))
+				break;
+			idq.dqb_ihardlimit = cdq.dqb_ihardlimit;
+			idq.dqb_isoftlimit = cdq.dqb_isoftlimit;
+			idq.dqb_curinodes = cdq.dqb_curinodes;
+			idq.dqb_bhardlimit = cdq.dqb_bhardlimit;
+			idq.dqb_bsoftlimit = cdq.dqb_bsoftlimit;
+			idq.dqb_curspace = cdq.dqb_curspace;
+			idq.dqb_valid = 0;
+			if (cmds == QC_SETQUOTA || cmds == QC_SETQLIM)
+				idq.dqb_valid |= QIF_LIMITS;
+			if (cmds == QC_SETQUOTA || cmds == QC_SETUSE)
+				idq.dqb_valid |= QIF_USAGE;
+			ret = sb->s_qcop->set_dqblk(sb, type, id, &idq);
+			break;
+		}
+
+		case QC_GETINFO: {
+			struct if_dqinfo iinf;
+			struct compat_dqinfo cinf;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
+			if (ret)
+				break;
+			ret = sb->s_qcop->get_info(sb, type, &iinf);
+			if (ret)
+				break;
+			cinf.dqi_bgrace = iinf.dqi_bgrace;
+			cinf.dqi_igrace = iinf.dqi_igrace;
+			cinf.dqi_flags = 0;
+			if (iinf.dqi_flags & DQF_INFO_DIRTY)
+				cinf.dqi_flags |= 0x0010;
+			cinf.dqi_blocks = 0;
+			cinf.dqi_free_blk = 0;
+			cinf.dqi_free_entry = 0;
+			ret = 0;
+			if (copy_to_user(addr, &cinf, sizeof(cinf)))
+				ret = -EFAULT;
+			break;
+		}
+
+		case QC_SETINFO:
+		case QC_SETGRACE:
+		case QC_SETFLAGS: {
+			struct if_dqinfo iinf;
+			struct compat_dqinfo cinf;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_SETINFO, id);
+			if (ret)
+				break;
+			ret = -EFAULT;
+			if (copy_from_user(&cinf, addr, sizeof(cinf)))
+				break;
+			iinf.dqi_bgrace = cinf.dqi_bgrace;
+			iinf.dqi_igrace = cinf.dqi_igrace;
+			iinf.dqi_flags = cinf.dqi_flags;
+			iinf.dqi_valid = 0;
+			if (cmds == QC_SETINFO || cmds == QC_SETGRACE)
+				iinf.dqi_valid |= IIF_BGRACE | IIF_IGRACE;
+			if (cmds == QC_SETINFO || cmds == QC_SETFLAGS)
+				iinf.dqi_valid |= IIF_FLAGS;
+			ret = sb->s_qcop->set_info(sb, type, &iinf);
+			break;
+		}
+
+		case QC_GETSTATS: {
+			struct compat_dqstats stat;
+
+			memset(&stat, 0, sizeof(stat));
+			stat.version = 6*10000+5*100+0;
+			ret = 0;
+			if (copy_to_user(addr, &stat, sizeof(stat)))
+				ret = -EFAULT;
+			break;
+		}
+
+		case QC_GETQUOTI:
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_GETINFO, id);
+			if (ret)
+				break;
+			ret = -ENOSYS;
+			if (!sb->s_qcop->get_quoti)
+				break;
+			ret = sb->s_qcop->get_quoti(sb, type, id, addr);
+			break;
+
+		default:
+			ret = -ENOSYS;
+			break;
+	}
+	if (sb && !IS_ERR(sb))
+		drop_super(sb);
+	return ret;
+}
+
+#endif
+
 /*
  * This is the system call interface. This communicates with
  * the user-level programs. Currently this only supports diskquota
@@ -268,25 +500,20 @@ asmlinkage long sys_quotactl(unsigned in
 {
 	uint cmds, type;
 	struct super_block *sb = NULL;
-	struct block_device *bdev;
-	char *tmp;
 	int ret;
 
 	cmds = cmd >> SUBCMDSHIFT;
 	type = cmd & SUBCMDMASK;
 
+#ifdef CONFIG_QUOTA_COMPAT
+	if (cmds >= 0x0100 && cmds < 0x3000)
+		return compat_quotactl(cmds, type, special, id, addr);
+#endif
+
 	if (cmds != Q_SYNC || special) {
-		tmp = getname(special);
-		if (IS_ERR(tmp))
-			return PTR_ERR(tmp);
-		bdev = lookup_bdev(tmp);
-		putname(tmp);
-		if (IS_ERR(bdev))
-			return PTR_ERR(bdev);
-		sb = get_super(bdev);
-		bdput(bdev);
-		if (!sb)
-			return -ENODEV;
+		sb = quota_get_sb(special);
+		if (IS_ERR(sb))
+			return PTR_ERR(sb);
 	}
 
 	ret = check_quotactl_valid(sb, type, cmds, id);
@@ -297,3 +524,97 @@ asmlinkage long sys_quotactl(unsigned in
 
 	return ret;
 }
+
+#ifdef CONFIG_QUOTA_COMPAT
+/*
+ * These code works when _old_ 32-bit quota utils 
+ * works on 64-bit platform
+ */
+struct compat32_dqblk {
+	unsigned int dqb_ihardlimit;
+	unsigned int dqb_isoftlimit;
+	unsigned int dqb_curinodes;
+	unsigned int dqb_bhardlimit;
+	unsigned int dqb_bsoftlimit;
+	unsigned int dqb_curspace[2];
+	int dqb_btime;
+	int dqb_itime;
+};
+
+long sys32compat_quotactl(unsigned int cmd, const char __user *special,
+				 qid_t id, void __user *addr)
+{
+	uint cmds, type;
+	struct if_dqblk idq;
+	struct compat32_dqblk c32dq;
+	struct super_block *sb;
+	long ret;
+
+	cmds = cmd >> SUBCMDSHIFT;
+
+	if (cmds != QC_GETQUOTA  && cmds != QC_SETQUOTA &&
+		cmds != QC_SETUSE && cmds != QC_SETQLIM)
+		return sys_quotactl(cmd, special, id, addr);
+
+	sb = quota_get_sb(special);
+	ret = PTR_ERR(sb);
+	if (IS_ERR(sb))
+		goto out_sb;
+
+	type = cmd & SUBCMDMASK;
+
+	switch (cmds) {
+		case QC_GETQUOTA: {
+			ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
+			if (ret)
+				break;
+			ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
+			if (ret)
+				break;
+			c32dq.dqb_ihardlimit = idq.dqb_ihardlimit;
+			c32dq.dqb_isoftlimit = idq.dqb_isoftlimit;
+			c32dq.dqb_curinodes = idq.dqb_curinodes;
+			c32dq.dqb_bhardlimit = idq.dqb_bhardlimit;
+			c32dq.dqb_bsoftlimit = idq.dqb_bsoftlimit;
+			memcpy(c32dq.dqb_curspace, &idq.dqb_curspace, 8);
+			c32dq.dqb_btime = idq.dqb_btime;
+			c32dq.dqb_itime = idq.dqb_itime;
+			if (copy_to_user(addr, &c32dq, sizeof(c32dq)))
+				ret = -EFAULT;
+			break;
+		}
+		case QC_SETQUOTA:
+		case QC_SETUSE:
+		case QC_SETQLIM: {
+			ret = check_quotactl_valid(sb, type, Q_SETQUOTA, id);
+			if (ret)
+				break;
+			ret = -EFAULT;
+			if (copy_from_user(&c32dq, addr, sizeof(c32dq)))
+				break;
+			idq.dqb_ihardlimit = c32dq.dqb_ihardlimit;
+			idq.dqb_isoftlimit = c32dq.dqb_isoftlimit;
+			idq.dqb_curinodes = c32dq.dqb_curinodes;
+			idq.dqb_bhardlimit = c32dq.dqb_bhardlimit;
+			idq.dqb_bsoftlimit = c32dq.dqb_bsoftlimit;
+			memcpy(&idq.dqb_curspace, c32dq.dqb_curspace, 8);
+			idq.dqb_valid = 0;
+			if (cmds == QC_SETQUOTA || cmds == QC_SETQLIM)
+				idq.dqb_valid |= QIF_LIMITS;
+			if (cmds == QC_SETQUOTA || cmds == QC_SETUSE)
+				idq.dqb_valid |= QIF_USAGE;
+			ret = sb->s_qcop->set_dqblk(sb, type, id, &idq);
+			break;
+		}
+	}
+	drop_super(sb);
+out_sb:
+	return ret;
+}
+#else
+long sys32compat_quotactl(unsigned int cmd, const char __user *special,
+				 qid_t id, void __user *addr)
+{
+	return sys_quotactl(cmd, special, id, addr);
+}
+#endif
diff -Nurap linux-2.6.9-100.orig/fs/reiserfs/file.c linux-2.6.9-ve023stab054/fs/reiserfs/file.c
--- linux-2.6.9-100.orig/fs/reiserfs/file.c	2004-10-19 01:53:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/reiserfs/file.c	2011-06-15 19:26:18.000000000 +0400
@@ -536,7 +536,7 @@ error_exit:
 
 /* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
 void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */
-			      int num_pages /* amount of pages */) {
+			      size_t num_pages /* amount of pages */) {
     int i; // loop counter
 
     for (i=0; i < num_pages ; i++) {
@@ -567,7 +567,7 @@ int reiserfs_copy_from_user_to_file_regi
     int offset; // offset in page
 
     for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) {
-	int count = min_t(int,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
+	size_t count = min_t(size_t,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
 	struct page *page=prepared_pages[i]; // Current page we process.
 
 	fault_in_pages_readable( buf, count);
@@ -662,8 +662,8 @@ int reiserfs_submit_file_region_for_writ
 				struct reiserfs_transaction_handle *th,
 				struct inode *inode,
 				loff_t pos, /* Writing position offset */
-				int num_pages, /* Number of pages to write */
-				int write_bytes, /* number of bytes to write */
+				size_t num_pages, /* Number of pages to write */
+				size_t write_bytes, /* number of bytes to write */
 				struct page **prepared_pages /* list of pages */
 				)
 {
@@ -796,9 +796,9 @@ int reiserfs_check_for_tail_and_convert(
 int reiserfs_prepare_file_region_for_write(
 				struct inode *inode /* Inode of the file */,
 				loff_t pos, /* position in the file */
-				int num_pages, /* number of pages to
+				size_t num_pages, /* number of pages to
 					          prepare */
-				int write_bytes, /* Amount of bytes to be
+				size_t write_bytes, /* Amount of bytes to be
 						    overwritten from
 						    @pos */
 				struct page **prepared_pages /* pointer to array
@@ -1177,10 +1177,9 @@ ssize_t reiserfs_file_write( struct file
     while ( count > 0) {
 	/* This is the main loop in which we running until some error occures
 	   or until we write all of the data. */
-	int num_pages;/* amount of pages we are going to write this iteration */
-	int write_bytes; /* amount of bytes to write during this iteration */
-	int blocks_to_allocate; /* how much blocks we need to allocate for
-				   this iteration */
+	size_t num_pages;/* amount of pages we are going to write this iteration */
+	size_t write_bytes; /* amount of bytes to write during this iteration */
+	size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */
         
         /*  (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos*/
 	num_pages = !!((pos+count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial
@@ -1194,7 +1193,7 @@ ssize_t reiserfs_file_write( struct file
 	    /* If we were asked to write more data than we want to or if there
 	       is not that much space, then we shorten amount of data to write
 	       for this iteration. */
-	    num_pages = min_t(int, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
+	    num_pages = min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
 	    /* Also we should not forget to set size in bytes accordingly */
 	    write_bytes = (num_pages << PAGE_CACHE_SHIFT) - 
 			    (pos & (PAGE_CACHE_SIZE-1));
@@ -1220,7 +1219,7 @@ ssize_t reiserfs_file_write( struct file
 	    // But overwriting files on absolutelly full volumes would not
 	    // be very efficient. Well, people are not supposed to fill
 	    // 100% of disk space anyway.
-	    write_bytes = min_t(int, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
+	    write_bytes = min_t(size_t, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
 	    num_pages = 1;
 	    // No blocks were claimed before, so do it now.
 	    reiserfs_claim_blocks_to_be_allocated(inode->i_sb, 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits));
diff -Nurap linux-2.6.9-100.orig/fs/reiserfs/namei.c linux-2.6.9-ve023stab054/fs/reiserfs/namei.c
--- linux-2.6.9-100.orig/fs/reiserfs/namei.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/reiserfs/namei.c	2011-06-15 19:26:20.000000000 +0400
@@ -805,6 +805,9 @@ static int reiserfs_rmdir (struct inode 
     struct reiserfs_dir_entry de;
 
 
+    inode = dentry->d_inode;
+    DQUOT_INIT(inode);
+
     /* we will be doing 2 balancings and update 2 stat data */
     jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2;
 
@@ -820,8 +823,6 @@ static int reiserfs_rmdir (struct inode 
 	goto end_rmdir;
     }
 
-    inode = dentry->d_inode;
-
     reiserfs_update_inode_transaction(inode) ;
     reiserfs_update_inode_transaction(dir) ;
 
@@ -884,6 +885,7 @@ static int reiserfs_unlink (struct inode
     unsigned long savelink;
 
     inode = dentry->d_inode;
+    DQUOT_INIT(inode);
 
     /* in this transaction we can be doing at max two balancings and update
        two stat datas */
@@ -1154,6 +1156,8 @@ static int reiserfs_rename (struct inode
 
     old_inode = old_dentry->d_inode;
     new_dentry_inode = new_dentry->d_inode;
+    if (new_dentry_inode)
+	    DQUOT_INIT(new_dentry_inode);
 
     // make sure, that oldname still exists and points to an object we
     // are going to rename
diff -Nurap linux-2.6.9-100.orig/fs/reiserfs/xattr.c linux-2.6.9-ve023stab054/fs/reiserfs/xattr.c
--- linux-2.6.9-100.orig/fs/reiserfs/xattr.c	2004-10-19 01:55:28.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/reiserfs/xattr.c	2011-06-15 19:26:19.000000000 +0400
@@ -1442,9 +1442,26 @@ check_capabilities:
 }
 
 int
-reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd)
+reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *exec_perm)
 {
-    return __reiserfs_permission (inode, mask, nd, 1);
+	int ret;
+
+	if (exec_perm != NULL)
+		down(&inode->i_sem);
+
+	ret = __reiserfs_permission (inode, mask, nd, 1);
+
+	if (exec_perm != NULL) {
+		if (!ret) {
+			exec_perm->set = 1;
+			exec_perm->mode = inode->i_mode;
+			exec_perm->uid = inode->i_uid;
+			exec_perm->gid = inode->i_gid;
+		}
+		up(&inode->i_sem);
+	}
+	return ret;
 }
 
 int
diff -Nurap linux-2.6.9-100.orig/fs/select.c linux-2.6.9-ve023stab054/fs/select.c
--- linux-2.6.9-100.orig/fs/select.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/select.c	2011-06-15 19:26:19.000000000 +0400
@@ -25,6 +25,8 @@
 #include <asm/uaccess.h>
 #include <asm/div64.h>
 
+#include <ub/ub_mem.h>
+
 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
 
@@ -95,7 +97,8 @@ void __pollwait(struct file *filp, wait_
 	if (!table || POLL_TABLE_FULL(table)) {
 		struct poll_table_page *new_table;
 
-		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
+		new_table = (struct poll_table_page *) __get_free_page(
+				GFP_KERNEL_UBC);
 		if (!new_table) {
 			p->error = -ENOMEM;
 			__set_current_state(TASK_RUNNING);
@@ -271,7 +274,11 @@ int do_select(int n, fd_set_bits *fds, l
 
 static void *select_bits_alloc(int size)
 {
-	return kmalloc(6 * size, GFP_KERNEL);
+	int flags;
+	flags = GFP_KERNEL;
+	if (size > PAGE_SIZE / 6)
+		flags |= __GFP_UBC;
+	return kmalloc(6 * size, flags);
 }
 
 static void select_bits_free(void *bits, int size)
@@ -462,6 +469,7 @@ asmlinkage long sys_poll(struct pollfd _
  	struct poll_list *walk;
 	int timeout_msecs;
 	int64_t lltimeout;
+	int flags;
 
 	/* Do a sanity check on nfds ... */
 	if (nfds > current->rlim[RLIMIT_NOFILE].rlim_cur)
@@ -488,12 +496,15 @@ asmlinkage long sys_poll(struct pollfd _
 	walk = NULL;
 	i = nfds;
 	err = -ENOMEM;
+	flags = GFP_KERNEL | __GFP_UBC;
 	while(i!=0) {
 		struct poll_list *pp;
+		if (i <= POLLFD_PER_PAGE)
+			flags &= ~__GFP_UBC;
 		pp = kmalloc(sizeof(struct poll_list)+
 				sizeof(struct pollfd)*
 				(i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
-					GFP_KERNEL);
+					flags);
 		if(pp==NULL)
 			goto out_fds;
 		pp->next=NULL;
diff -Nurap linux-2.6.9-100.orig/fs/seq_file.c linux-2.6.9-ve023stab054/fs/seq_file.c
--- linux-2.6.9-100.orig/fs/seq_file.c	2004-10-19 01:54:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/seq_file.c	2011-06-15 19:26:19.000000000 +0400
@@ -311,6 +311,8 @@ int seq_path(struct seq_file *m,
 	if (m->count < m->size) {
 		char *s = m->buf + m->count;
 		char *p = d_path(dentry, mnt, s, m->size - m->count);
+		if (IS_ERR(p) && PTR_ERR(p) != -ENAMETOOLONG)
+			return 0;
 		if (!IS_ERR(p)) {
 			while (s <= p) {
 				char c = *p++;
diff -Nurap linux-2.6.9-100.orig/fs/simfs.c linux-2.6.9-ve023stab054/fs/simfs.c
--- linux-2.6.9-100.orig/fs/simfs.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/fs/simfs.c	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,373 @@
+/*
+ *  fs/simfs.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/namei.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/vzquota.h>
+#include <linux/statfs.h>
+#include <linux/virtinfo.h>
+#include <linux/faudit.h>
+#include <linux/genhd.h>
+#include <linux/reiserfs_fs.h>
+
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+
+#define SIMFS_GET_LOWER_FS_SB(sb) sb->s_root->d_sb
+
+static struct super_operations sim_super_ops;
+
+static int sim_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		struct kstat *stat)
+{
+	struct super_block *sb;
+	struct inode *inode;
+
+	inode = dentry->d_inode;
+	if (!inode->i_op->getattr) {
+		generic_fillattr(inode, stat);
+		if (!stat->blksize) {
+			unsigned blocks;
+
+			sb = inode->i_sb;
+			blocks = (stat->size + sb->s_blocksize-1) >>
+				sb->s_blocksize_bits;
+			stat->blocks = (sb->s_blocksize / 512) * blocks;
+			stat->blksize = sb->s_blocksize;
+		}
+	} else {
+		int err;
+
+		err = inode->i_op->getattr(mnt, dentry, stat);
+		if (err)
+			return err;
+	}
+
+	sb = mnt->mnt_sb;
+	if (sb->s_op == &sim_super_ops)
+		stat->dev = sb->s_dev;
+	return 0;
+}
+
+static int sim_getattr64(struct vfsmount *mnt, struct dentry *dentry,
+		struct kstat64 *stat)
+{
+	struct super_block *sb;
+	struct inode *inode;
+	int err;
+
+	err = 0;
+	inode = dentry->d_inode;
+
+	/* retrieve a 64-bit inode number if possible */
+	if (IS_INO64(inode)) {
+		struct inode_operations_ext *ixop =
+			(struct inode_operations_ext *) inode->i_op;
+
+		err = ixop->getattr64(mnt, dentry, stat);
+	} else if (!inode->i_op->getattr) {
+		generic_fillattr(inode, (struct kstat *)stat);
+		stat->ino64 = stat->ino;
+		if (!stat->blksize) {
+			unsigned blocks;
+
+			sb = inode->i_sb;
+			blocks = (stat->size + sb->s_blocksize-1) >>
+				sb->s_blocksize_bits;
+			stat->blocks = (sb->s_blocksize / 512) * blocks;
+			stat->blksize = sb->s_blocksize;
+		}
+	} else {
+		err = inode->i_op->getattr(mnt, dentry, (struct kstat *)stat);
+		if (!err)
+			stat->ino64 = stat->ino;
+	}
+
+	sb = mnt->mnt_sb;
+	if (sb->s_op == &sim_super_ops)
+		stat->dev = sb->s_dev;
+	return err;
+}
+
+static void quota_get_stat(struct super_block *sb, struct kstatfs *buf)
+{
+	int err;
+	struct dq_stat qstat;
+	struct virt_info_quota q;
+	long free_file, adj_file;
+	s64 blk, free_blk, adj_blk;
+	int bsize_bits;
+
+	q.super = sb;
+	q.qstat = &qstat;
+	err = virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_GETSTAT, &q);
+	if (err != NOTIFY_OK)
+		return;
+
+	bsize_bits = ffs(buf->f_bsize) - 1;
+	free_blk = (s64)(qstat.bsoftlimit - qstat.bcurrent) >> bsize_bits;
+	if (free_blk < 0)
+		free_blk = 0;
+	/*
+	 * In the regular case, we always set buf->f_bfree and buf->f_blocks to
+	 * the values reported by quota.  In case of real disk space shortage,
+	 * we adjust the values.  We want this adjustment to look as if the
+	 * total disk space were reduced, not as if the usage were increased.
+	 *    -- SAW
+	 */
+	adj_blk = 0;
+	if (buf->f_bfree < free_blk)
+		adj_blk = free_blk - buf->f_bfree;
+	buf->f_bfree = (long)(free_blk - adj_blk);
+
+	if (free_blk < buf->f_bavail)
+		buf->f_bavail = (long)free_blk; /* min(f_bavail, free_blk) */
+
+	blk = (qstat.bsoftlimit >> bsize_bits) - adj_blk;
+	buf->f_blocks = blk > LONG_MAX ? LONG_MAX : blk;
+
+	free_file = qstat.isoftlimit - qstat.icurrent;
+	if (free_file < 0)
+		free_file = 0;
+	if (buf->f_type == REISERFS_SUPER_MAGIC)
+		/*
+		 * reiserfs doesn't initialize f_ffree and f_files values of
+		 * kstatfs because it doesn't have an inode limit.
+		 */
+		buf->f_ffree = free_file;
+	adj_file = 0;
+	if (buf->f_ffree < free_file)
+		adj_file = free_file - buf->f_ffree;
+	buf->f_ffree = free_file - adj_file;
+	buf->f_files = qstat.isoftlimit - adj_file;
+}
+
+static int sim_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	int err;
+	struct super_block *lsb;
+	struct kstatfs statbuf;
+
+	err = 0;
+	if (sb->s_op != &sim_super_ops)
+		return 0;
+
+	memset(&statbuf, 0, sizeof(statbuf));
+	lsb = SIMFS_GET_LOWER_FS_SB(sb);
+
+	err = -ENOSYS;
+	if (lsb && lsb->s_op && lsb->s_op->statfs)
+		err = lsb->s_op->statfs(lsb, &statbuf);
+	if (err)
+		return err;
+
+	quota_get_stat(sb, &statbuf);
+	buf->f_files    = statbuf.f_files;
+	buf->f_ffree    = statbuf.f_ffree;
+	buf->f_blocks   = statbuf.f_blocks;
+	buf->f_bfree    = statbuf.f_bfree;
+	buf->f_bavail   = statbuf.f_bavail;
+	return 0;
+}
+
+static int sim_systemcall(struct vnotifier_block *me, unsigned long n,
+		void *d, int old_ret)
+{
+	int err;
+
+	switch (n) {
+	case VIRTINFO_FAUDIT_STAT: {
+		struct faudit_stat_arg *arg;
+
+		arg = (struct faudit_stat_arg *)d;
+		err = sim_getattr(arg->mnt, arg->dentry, arg->stat);
+		arg->err = err;
+		}
+		break;
+	case VIRTINFO_FAUDIT_STAT64: {
+		struct faudit_stat64_arg *arg;
+
+		arg = (struct faudit_stat64_arg *)d;
+		err = sim_getattr64(arg->mnt, arg->dentry, arg->stat);
+		arg->err = err;
+		}
+		break;
+	case VIRTINFO_FAUDIT_STATFS: {
+		struct faudit_statfs_arg *arg;
+
+		arg = (struct faudit_statfs_arg *)d;
+		err = sim_statfs(arg->sb, arg->stat);
+		arg->err = err;
+		}
+		break;
+	default:
+		return old_ret;
+	}
+	return (err ? NOTIFY_BAD : NOTIFY_OK);
+}
+
+static struct inode *sim_quota_root(struct super_block *sb)
+{
+	return sb->s_root->d_inode;
+}
+
+/*
+ * NOTE: We need to setup s_bdev field on super block, since sys_quotactl()
+ * does lookup_bdev() and get_super() which are comparing sb->s_bdev.
+ * so this is a MUST if we want unmodified sys_quotactl
+ * to work correctly on /dev/simfs inside VE
+ */
+static int sim_init_blkdev(struct super_block *sb)
+{
+	static struct hd_struct fake_hd;
+	struct block_device *blkdev;
+
+	blkdev = bdget(sb->s_dev);
+	if (blkdev == NULL)
+		return -ENOMEM;
+
+	blkdev->bd_part = &fake_hd;	/* required for bdev_read_only() */
+	sb->s_bdev = blkdev;
+
+	return 0;
+}
+
+static void sim_free_blkdev(struct super_block *sb)
+{
+	/* set bd_part back to NULL */
+	sb->s_bdev->bd_part = NULL;
+	bdput(sb->s_bdev);
+}
+
+static void sim_quota_init(struct super_block *sb)
+{
+	struct virt_info_quota viq;
+
+	viq.super = sb;
+	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_ON, &viq);
+}
+
+static void sim_quota_free(struct super_block *sb)
+{
+	struct virt_info_quota viq;
+
+	viq.super = sb;
+	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_OFF, &viq);
+}
+
+void sim_put_super(struct super_block *sb)
+{
+	mntput((struct vfsmount *)(sb->s_fs_info));
+
+	sim_quota_free(sb);
+	sim_free_blkdev(sb);
+}
+
+static struct super_operations sim_super_ops = {
+	.get_quota_root	= sim_quota_root,
+	.put_super = sim_put_super,
+};
+
+static int sim_fill_super(struct super_block *s, void *data)
+{
+	int err;
+	struct nameidata *nd;
+
+	err = set_anon_super(s, NULL);
+	if (err)
+		goto out;
+
+	err = 0;
+	nd = (struct nameidata *)data;
+	s->s_fs_info = mntget(nd->mnt);
+	s->s_root = dget(nd->dentry);
+	s->s_op = &sim_super_ops;
+out:
+	return err;
+}
+
+struct super_block *sim_get_sb(struct file_system_type *type,
+		int flags, const char *dev_name, void *opt)
+{
+	int err;
+	struct nameidata nd;
+	struct super_block *sb;
+
+	sb = ERR_PTR(-EINVAL);
+	if (opt == NULL)
+		goto out;
+
+	err = path_lookup(opt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+	sb = ERR_PTR(err);
+	if (err)
+		goto out;
+
+	sb = sget(type, NULL, sim_fill_super, &nd);
+	if (IS_ERR(sb))
+		goto out_path;
+
+	err = sim_init_blkdev(sb);
+	if (err)
+		goto out_killsb;
+
+	sim_quota_init(sb);
+out_path:
+	path_release(&nd);
+out:
+	return sb;
+
+out_killsb:
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+	sb = ERR_PTR(-ENODEV);
+	goto out_path;
+}
+
+static struct file_system_type sim_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "simfs",
+	.get_sb		= sim_get_sb,
+	.kill_sb	= kill_anon_super,
+};
+
+static struct vnotifier_block sim_syscalls = {
+	.notifier_call = sim_systemcall,
+};
+
+static int __init init_simfs(void)
+{
+	int err;
+
+	err = register_filesystem(&sim_fs_type);
+	if (err)
+		return err;
+
+	virtinfo_notifier_register(VITYPE_FAUDIT, &sim_syscalls);
+	return 0;
+}
+
+static void __exit exit_simfs(void)
+{
+	virtinfo_notifier_unregister(VITYPE_FAUDIT, &sim_syscalls);
+	unregister_filesystem(&sim_fs_type);
+}
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Open Virtuozzo Simulation of File System");
+MODULE_LICENSE("GPL v2");
+
+module_init(init_simfs);
+module_exit(exit_simfs);
diff -Nurap linux-2.6.9-100.orig/fs/smbfs/file.c linux-2.6.9-ve023stab054/fs/smbfs/file.c
--- linux-2.6.9-100.orig/fs/smbfs/file.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/smbfs/file.c	2011-06-15 19:26:19.000000000 +0400
@@ -388,7 +388,8 @@ smb_file_release(struct inode *inode, st
  * privileges, so we need our own check for this.
  */
 static int
-smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
+smb_file_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *exec_perm)
 {
 	int mode = inode->i_mode;
 	int error = 0;
diff -Nurap linux-2.6.9-100.orig/fs/smbfs/inode.c linux-2.6.9-ve023stab054/fs/smbfs/inode.c
--- linux-2.6.9-100.orig/fs/smbfs/inode.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/smbfs/inode.c	2011-06-15 19:26:19.000000000 +0400
@@ -233,7 +233,7 @@ smb_invalidate_inodes(struct smb_sb_info
 {
 	VERBOSE("\n");
 	shrink_dcache_sb(SB_of(server));
-	invalidate_inodes(SB_of(server));
+	invalidate_inodes(SB_of(server), 0);
 }
 
 /*
diff -Nurap linux-2.6.9-100.orig/fs/smbfs/sock.c linux-2.6.9-ve023stab054/fs/smbfs/sock.c
--- linux-2.6.9-100.orig/fs/smbfs/sock.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/smbfs/sock.c	2011-06-15 19:26:20.000000000 +0400
@@ -100,6 +100,7 @@ smb_close_socket(struct smb_sb_info *ser
 
 		VERBOSE("closing socket %p\n", sock);
 		sock->sk->sk_data_ready = server->data_ready;
+		sock->sk->sk_user_data = NULL;
 		server->sock_file = NULL;
 		fput(file);
 	}
diff -Nurap linux-2.6.9-100.orig/fs/stat.c linux-2.6.9-ve023stab054/fs/stat.c
--- linux-2.6.9-100.orig/fs/stat.c	2011-06-09 19:22:48.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/stat.c	2011-06-15 19:26:20.000000000 +0400
@@ -14,6 +14,7 @@
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/security.h>
+#include <linux/faudit.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -41,11 +42,19 @@ int vfs_getattr(struct vfsmount *mnt, st
 {
 	struct inode *inode = dentry->d_inode;
 	int retval;
+	struct faudit_stat_arg arg;
 
 	retval = security_inode_getattr(mnt, dentry);
 	if (retval)
 		return retval;
 
+	arg.mnt = mnt;
+	arg.dentry = dentry;
+	arg.stat = stat;
+	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STAT, &arg)
+			!= NOTIFY_DONE)
+		return arg.err;
+
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(mnt, dentry, stat);
 
@@ -66,11 +75,19 @@ int vfs_getattr64(struct vfsmount *mnt, 
 {
 	struct inode *inode = dentry->d_inode;
 	int retval;
+	struct faudit_stat64_arg arg;
 
 	retval = security_inode_getattr(mnt, dentry);
 	if (retval)
 		return retval;
 
+	arg.mnt = mnt;
+	arg.dentry = dentry;
+	arg.stat = stat;
+	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STAT64, &arg)
+			!= NOTIFY_DONE)
+		return arg.err;
+
 	/* retrieve a 64-bit inode number if possible */
 	if (IS_INO64(inode)) {
 		struct inode_operations_ext *ixop =
diff -Nurap linux-2.6.9-100.orig/fs/super.c linux-2.6.9-ve023stab054/fs/super.c
--- linux-2.6.9-100.orig/fs/super.c	2011-06-09 19:22:48.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/super.c	2011-06-15 19:26:22.000000000 +0400
@@ -23,6 +23,7 @@
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/ve_owner.h>
 #include <linux/init.h>
 #include <linux/smp_lock.h>
 #include <linux/acct.h>
@@ -35,6 +36,7 @@
 #include <linux/vfs.h>
 #include <linux/writeback.h>		/* for the emergency remount stuff */
 #include <linux/idr.h>
+#include <linux/kobject.h>
 #include <asm/uaccess.h>
 
 
@@ -65,8 +67,10 @@ static struct super_block *alloc_super(v
 		}
 		INIT_LIST_HEAD(&s->s_dirty);
 		INIT_LIST_HEAD(&s->s_io);
+		INIT_LIST_HEAD(&s->s_inodes);
 		INIT_LIST_HEAD(&s->s_files);
 		INIT_LIST_HEAD(&s->s_instances);
+		INIT_LIST_HEAD(&s->s_dentry_unused);
 		INIT_HLIST_HEAD(&s->s_anon);
 		init_rwsem(&s->s_umount);
 		sema_init(&s->s_lock, 1);
@@ -233,7 +237,7 @@ void generic_shutdown_super(struct super
 		lock_kernel();
 		sb->s_flags &= ~MS_ACTIVE;
 		/* bad name - it should be evict_inodes() */
-		invalidate_inodes(sb);
+		invalidate_inodes(sb, 0);
 
 		if (sop->write_super && sb->s_dirt)
 			sop->write_super(sb);
@@ -241,10 +245,9 @@ void generic_shutdown_super(struct super
 			sop->put_super(sb);
 
 		/* Forget any remaining inodes */
-		if (invalidate_inodes(sb)) {
-			printk("VFS: Busy inodes after unmount. "
-			   "Self-destruct in 5 seconds.  Have a nice day...\n");
-		}
+		if (invalidate_inodes(sb, 1))
+			printk("Self-destruct in 5 seconds. "
+				"Have a nice day...\n");
 
 		unlock_kernel();
 		unlock_super(sb);
@@ -337,20 +340,22 @@ static inline void write_super(struct su
  */
 void sync_supers(void)
 {
-	struct super_block * sb;
-restart:
+	struct super_block *sb;
+	
 	spin_lock(&sb_lock);
-	sb = sb_entry(super_blocks.next);
-	while (sb != sb_entry(&super_blocks))
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (sb->s_dirt) {
 			sb->s_count++;
 			spin_unlock(&sb_lock);
 			down_read(&sb->s_umount);
 			write_super(sb);
-			drop_super(sb);
-			goto restart;
-		} else
-			sb = sb_entry(sb->s_list.next);
+			up_read(&sb->s_umount);
+			spin_lock(&sb_lock);
+			if (__put_super_and_need_restart(sb)) 
+				goto restart;
+		}
+	}
 	spin_unlock(&sb_lock);
 }
 
@@ -377,20 +382,16 @@ void sync_filesystems(int wait)
 
 	down(&mutex);		/* Could be down_interruptible */
 	spin_lock(&sb_lock);
-	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
-			sb = sb_entry(sb->s_list.next)) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (!sb->s_op->sync_fs)
 			continue;
 		if (sb->s_flags & MS_RDONLY)
 			continue;
 		sb->s_need_sync_fs = 1;
 	}
-	spin_unlock(&sb_lock);
 
 restart:
-	spin_lock(&sb_lock);
-	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
-			sb = sb_entry(sb->s_list.next)) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (!sb->s_need_sync_fs)
 			continue;
 		sb->s_need_sync_fs = 0;
@@ -401,8 +402,11 @@ restart:
 		down_read(&sb->s_umount);
 		if (sb->s_root && (wait || sb->s_dirt))
 			sb->s_op->sync_fs(sb, wait);
-		drop_super(sb);
-		goto restart;
+		up_read(&sb->s_umount);
+		/* restart only when sb is no longer on the list */
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
 	}
 	spin_unlock(&sb_lock);
 	up(&mutex);
@@ -418,20 +422,20 @@ restart:
 
 struct super_block * get_super(struct block_device *bdev)
 {
-	struct list_head *p;
+	struct super_block *sb;
+
 	if (!bdev)
 		return NULL;
 rescan:
 	spin_lock(&sb_lock);
-	list_for_each(p, &super_blocks) {
-		struct super_block *s = sb_entry(p);
-		if (s->s_bdev == bdev) {
-			s->s_count++;
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_bdev == bdev) {
+			sb->s_count++;
 			spin_unlock(&sb_lock);
-			down_read(&s->s_umount);
-			if (s->s_root)
-				return s;
-			drop_super(s);
+			down_read(&sb->s_umount);
+			if (sb->s_root)
+				return sb;
+			drop_super(sb);
 			goto rescan;
 		}
 	}
@@ -443,19 +447,18 @@ EXPORT_SYMBOL(get_super);
  
 struct super_block * user_get_super(dev_t dev)
 {
-	struct list_head *p;
+	struct super_block *sb;
 
 rescan:
 	spin_lock(&sb_lock);
-	list_for_each(p, &super_blocks) {
-		struct super_block *s = sb_entry(p);
-		if (s->s_dev ==  dev) {
-			s->s_count++;
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_dev ==  dev) {
+			sb->s_count++;
 			spin_unlock(&sb_lock);
-			down_read(&s->s_umount);
-			if (s->s_root)
-				return s;
-			drop_super(s);
+			down_read(&sb->s_umount);
+			if (sb->s_root)
+				return sb;
+			drop_super(sb);
 			goto rescan;
 		}
 	}
@@ -470,11 +473,20 @@ asmlinkage long sys_ustat(unsigned dev, 
         struct super_block *s;
         struct ustat tmp;
         struct kstatfs sbuf;
-	int err = -EINVAL;
+	dev_t kdev;
+	int err;
 
-        s = user_get_super(new_decode_dev(dev));
-        if (s == NULL)
-                goto out;
+	kdev = new_decode_dev(dev);
+#ifdef CONFIG_VE
+	err = get_device_perms_ve(S_IFBLK, kdev, FMODE_READ);
+	if (err)
+		goto out;
+#endif
+
+	err = -EINVAL;
+	s = user_get_super(kdev);
+	if (s == NULL)
+		goto out;
 	err = vfs_statfs(s, &sbuf);
 	drop_super(s);
 	if (err)
@@ -588,6 +600,13 @@ void emergency_remount(void)
 static struct idr unnamed_dev_idr;
 static spinlock_t unnamed_dev_lock = SPIN_LOCK_UNLOCKED;/* protects the above */
 
+/* for compatibility with coreutils still unaware of new minor sizes */
+int unnamed_dev_majors[] = {
+	0, 144, 145, 146, 242, 243, 244, 245,
+	246, 247, 248, 249, 250, 251, 252, 253
+};
+EXPORT_SYMBOL(unnamed_dev_majors);
+
 int set_anon_super(struct super_block *s, void *data)
 {
 	int dev;
@@ -605,13 +624,13 @@ int set_anon_super(struct super_block *s
 	else if (error)
 		return -EAGAIN;
 
-	if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
+	if ((dev & MAX_ID_MASK) >= (1 << MINORBITS)) {
 		spin_lock(&unnamed_dev_lock);
 		idr_remove(&unnamed_dev_idr, dev);
 		spin_unlock(&unnamed_dev_lock);
 		return -EMFILE;
 	}
-	s->s_dev = MKDEV(0, dev & MINORMASK);
+	s->s_dev = make_unnamed_dev(dev);
 	return 0;
 }
 
@@ -619,8 +638,9 @@ EXPORT_SYMBOL(set_anon_super);
 
 void kill_anon_super(struct super_block *sb)
 {
-	int slot = MINOR(sb->s_dev);
+	int slot;
 
+	slot = unnamed_dev_idx(sb->s_dev);
 	generic_shutdown_super(sb);
 	spin_lock(&unnamed_dev_lock);
 	idr_remove(&unnamed_dev_idr, slot);
@@ -655,6 +675,16 @@ static int test_bdev_super(struct super_
 	return (void *)s->s_bdev == data;
 }
 
+static void bdev_uevent(struct block_device *bdev, enum kobject_action action)
+{
+	if (bdev->bd_disk) {
+		if (bdev->bd_part)
+			kobject_uevent(&bdev->bd_part->kobj, action, NULL);
+		else
+			kobject_uevent(&bdev->bd_disk->kobj, action, NULL);
+	}
+}
+
 struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int))
@@ -697,8 +727,10 @@ struct super_block *get_sb_bdev(struct f
 			up_write(&s->s_umount);
 			deactivate_super(s);
 			s = ERR_PTR(error);
-		} else
+		} else {
 			s->s_flags |= MS_ACTIVE;
+			bdev_uevent(bdev, KOBJ_MOUNT);
+		}
 	}
 
 	return s;
@@ -713,6 +745,8 @@ EXPORT_SYMBOL(get_sb_bdev);
 void kill_block_super(struct super_block *sb)
 {
 	struct block_device *bdev = sb->s_bdev;
+
+	bdev_uevent(bdev, KOBJ_UMOUNT);
 	generic_shutdown_super(sb);
 	sync_blockdev(bdev);
 	close_bdev_excl(bdev);
@@ -776,17 +810,14 @@ struct super_block *get_sb_single(struct
 EXPORT_SYMBOL(get_sb_single);
 
 struct vfsmount *
-do_kern_mount(const char *fstype, int flags, const char *name, void *data)
+do_kern_mount(struct file_system_type *type, int flags,
+			const char *name, void *data)
 {
-	struct file_system_type *type = get_fs_type(fstype);
 	struct super_block *sb = ERR_PTR(-ENOMEM);
 	struct vfsmount *mnt;
 	int error;
 	char *secdata = NULL;
 
-	if (!type)
-		return ERR_PTR(-ENODEV);
-
 	mnt = alloc_vfsmnt(name);
 	if (!mnt)
 		goto out;
@@ -818,7 +849,6 @@ do_kern_mount(const char *fstype, int fl
 	mnt->mnt_namespace = current->namespace;
 	up_write(&sb->s_umount);
 	free_secdata(secdata);
-	put_filesystem(type);
 	return mnt;
 out_sb:
 	up_write(&sb->s_umount);
@@ -829,7 +859,6 @@ out_free_secdata:
 out_mnt:
 	free_vfsmnt(mnt);
 out:
-	put_filesystem(type);
 	return (struct vfsmount *)sb;
 }
 
@@ -837,7 +866,7 @@ EXPORT_SYMBOL_GPL(do_kern_mount);
 
 struct vfsmount *kern_mount(struct file_system_type *type)
 {
-	return do_kern_mount(type->name, 0, type->name, NULL);
+	return do_kern_mount(type, 0, type->name, NULL);
 }
 
 EXPORT_SYMBOL(kern_mount);
diff -Nurap linux-2.6.9-100.orig/fs/sysfs/bin.c linux-2.6.9-ve023stab054/fs/sysfs/bin.c
--- linux-2.6.9-100.orig/fs/sysfs/bin.c	2011-06-09 19:22:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/sysfs/bin.c	2011-06-15 19:26:19.000000000 +0400
@@ -158,6 +158,11 @@ struct file_operations bin_fops = {
 
 int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
+#ifdef CONFIG_VE
+	if (!get_exec_env()->sysfs_sb)
+		return 0;
+#endif
+
 	BUG_ON(!kobj || !kobj->dentry || !attr);
 
 	return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
@@ -173,6 +178,10 @@ int sysfs_create_bin_file(struct kobject
 
 int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
+#ifdef CONFIG_VE
+	if (!get_exec_env()->sysfs_sb)
+		return 0;
+#endif
 	sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
 	return 0;
 }
diff -Nurap linux-2.6.9-100.orig/fs/sysfs/dir.c linux-2.6.9-ve023stab054/fs/sysfs/dir.c
--- linux-2.6.9-100.orig/fs/sysfs/dir.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/sysfs/dir.c	2011-06-15 19:26:19.000000000 +0400
@@ -194,12 +194,16 @@ int sysfs_create_dir(struct kobject * ko
 	struct dentry * parent;
 	int error = 0;
 
+#ifdef CONFIG_VE
+	if (!get_exec_env()->sysfs_sb)
+		return 0;
+#endif
 	BUG_ON(!kobj);
 
 	if (kobj->parent)
 		parent = kobj->parent->dentry;
-	else if (sysfs_mount && sysfs_mount->mnt_sb)
-		parent = sysfs_mount->mnt_sb->s_root;
+	else if (visible_sysfs_mount && visible_sysfs_mount->mnt_sb)
+		parent = visible_sysfs_mount->mnt_sb->s_root;
 	else
 		return -EFAULT;
 
@@ -332,11 +336,16 @@ void sysfs_remove_subdir(struct dentry *
 
 void sysfs_remove_dir(struct kobject * kobj)
 {
-	struct dentry * dentry = dget(kobj->dentry);
+	struct dentry * dentry;
 	struct sysfs_dirent * parent_sd;
 	struct sysfs_dirent * sd, * tmp;
 
-	if (!dentry)
+#ifdef CONFIG_VE
+	if (!get_exec_env()->sysfs_sb)
+		return;
+#endif
+	 dentry = dget(kobj->dentry);
+	 if (!dentry)
 		return;
 
 	pr_debug("sysfs %s: removing dir\n",dentry->d_name.name);
@@ -363,6 +372,10 @@ int sysfs_rename_dir(struct kobject * ko
 	int error = 0;
 	struct dentry * new_dentry, * parent;
 
+#ifdef CONFIG_VE
+	if (!get_exec_env()->sysfs_sb)
+		return 0;
+#endif
 	if (!strcmp(kobject_name(kobj), new_name))
 		return -EINVAL;
 
diff -Nurap linux-2.6.9-100.orig/fs/sysfs/file.c linux-2.6.9-ve023stab054/fs/sysfs/file.c
--- linux-2.6.9-100.orig/fs/sysfs/file.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/sysfs/file.c	2011-06-15 19:26:19.000000000 +0400
@@ -380,6 +380,10 @@ int sysfs_add_file(struct dentry * dir, 
 
 int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 {
+#ifdef CONFIG_VE
+	if (!get_exec_env()->sysfs_sb)
+		return 0;
+#endif
 	BUG_ON(!kobj || !kobj->dentry || !attr);
 
 	return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
@@ -401,6 +405,10 @@ int sysfs_update_file(struct kobject * k
 	struct dentry * victim;
 	int res = -ENOENT;
 
+#ifdef CONFIG_VE
+	if (!get_exec_env()->sysfs_sb)
+		return 0;
+#endif
 	down(&dir->d_inode->i_sem);
 	victim = sysfs_get_dentry(dir, attr->name);
 	if (!IS_ERR(victim)) {
@@ -433,6 +441,10 @@ int sysfs_update_file(struct kobject * k
 
 void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 {
+#ifdef CONFIG_VE
+	if (!get_exec_env()->sysfs_sb)
+		return;
+#endif
 	sysfs_hash_and_remove(kobj->dentry,attr->name);
 }
 
diff -Nurap linux-2.6.9-100.orig/fs/sysfs/group.c linux-2.6.9-ve023stab054/fs/sysfs/group.c
--- linux-2.6.9-100.orig/fs/sysfs/group.c	2011-06-09 19:22:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/sysfs/group.c	2011-06-15 19:26:19.000000000 +0400
@@ -45,6 +45,10 @@ int sysfs_create_group(struct kobject * 
 	struct dentry * dir;
 	int error;
 
+#ifdef CONFIG_VE
+	if (!get_exec_env()->sysfs_sb)
+		return 0;
+#endif
 	BUG_ON(!kobj || !kobj->dentry);
 
 	if (grp->name) {
@@ -67,6 +71,10 @@ void sysfs_remove_group(struct kobject *
 {
 	struct dentry * dir;
 
+#ifdef CONFIG_VE
+	if (!get_exec_env()->sysfs_sb)
+		return;
+#endif
 	if (grp->name)
 		dir = sysfs_get_dentry(kobj->dentry,grp->name);
 	else
diff -Nurap linux-2.6.9-100.orig/fs/sysfs/inode.c linux-2.6.9-ve023stab054/fs/sysfs/inode.c
--- linux-2.6.9-100.orig/fs/sysfs/inode.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/sysfs/inode.c	2011-06-15 19:26:19.000000000 +0400
@@ -8,12 +8,19 @@
 
 #undef DEBUG 
 
+#include <linux/config.h>
 #include <linux/pagemap.h>
 #include <linux/namei.h>
 #include <linux/backing-dev.h>
 #include "sysfs.h"
 
-extern struct super_block * sysfs_sb;
+
+#ifndef CONFIG_VE
+extern struct super_block *sysfs_sb;
+#define visible_sysfs_sb sysfs_sb
+#else
+#define visible_sysfs_sb  (get_exec_env()->sysfs_sb)
+#endif
 
 static struct address_space_operations sysfs_aops = {
 	.readpage	= simple_readpage,
@@ -28,7 +35,7 @@ static struct backing_dev_info sysfs_bac
 
 struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *sd)
 {
-	struct inode * inode = new_inode(sysfs_sb);
+	struct inode * inode = new_inode(visible_sysfs_sb);
 	if (inode) {
 		inode->i_mode = mode;
 		inode->i_uid = 0;
@@ -144,12 +151,15 @@ void sysfs_drop_dentry(struct sysfs_dire
 
 	if (dentry) {
 		spin_lock(&dcache_lock);
+		spin_lock(&dentry->d_lock);
 		if (!d_unhashed(dentry) && dentry->d_inode) {
 			dget_locked(dentry);
 			__d_drop(dentry);
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 			simple_unlink(parent->d_inode, dentry);
 		} else {
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 		}
 
diff -Nurap linux-2.6.9-100.orig/fs/sysfs/mount.c linux-2.6.9-ve023stab054/fs/sysfs/mount.c
--- linux-2.6.9-100.orig/fs/sysfs/mount.c	2011-06-09 19:22:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/sysfs/mount.c	2011-06-15 19:26:19.000000000 +0400
@@ -7,6 +7,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
+#include <linux/module.h>
 #include <linux/init.h>
 
 #include "sysfs.h"
@@ -17,6 +18,15 @@
 struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
 
+void prepare_sysfs(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->sysfs_mnt = sysfs_mount;
+	sysfs_mount = (struct vfsmount *)SYSFS_MAGIC;
+	/* ve0.sysfs_sb is setup by sysfs_fill_super() */
+#endif
+}
+
 static struct super_operations sysfs_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
@@ -30,6 +40,14 @@ static struct sysfs_dirent sysfs_root = 
 	.s_ino		= 1,
 };
 
+#ifdef CONFIG_VE
+static void init_ve0_sysfs_root(void)
+{
+	get_ve0()->sysfs_root = &sysfs_root;
+}
+#define sysfs_root (*(get_exec_env()->sysfs_root))
+#endif
+
 static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
@@ -39,7 +57,7 @@ static int sysfs_fill_super(struct super
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = SYSFS_MAGIC;
 	sb->s_op = &sysfs_ops;
-	sysfs_sb = sb;
+	visible_sysfs_sb = sb;
 
 	inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
 				 &sysfs_root);
@@ -70,16 +88,21 @@ static struct super_block *sysfs_get_sb(
 	return get_sb_single(fs_type, flags, data, sysfs_fill_super);
 }
 
-static struct file_system_type sysfs_fs_type = {
+struct file_system_type sysfs_fs_type = {
 	.name		= "sysfs",
 	.get_sb		= sysfs_get_sb,
 	.kill_sb	= kill_litter_super,
 };
 
+EXPORT_SYMBOL(sysfs_fs_type);
+
 int __init sysfs_init(void)
 {
 	int err;
 
+#ifdef CONFIG_VE
+	init_ve0_sysfs_root();
+#endif
 	err = register_filesystem(&sysfs_fs_type);
 	if (!err) {
 		sysfs_mount = kern_mount(&sysfs_fs_type);
@@ -89,5 +112,6 @@ int __init sysfs_init(void)
 			sysfs_mount = NULL;
 		}
 	}
+	prepare_sysfs();
 	return err;
 }
diff -Nurap linux-2.6.9-100.orig/fs/sysfs/symlink.c linux-2.6.9-ve023stab054/fs/sysfs/symlink.c
--- linux-2.6.9-100.orig/fs/sysfs/symlink.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/sysfs/symlink.c	2011-06-15 19:26:19.000000000 +0400
@@ -84,6 +84,10 @@ int sysfs_create_link(struct kobject * k
 	struct dentry * dentry = kobj->dentry;
 	int error = -EEXIST;
 
+#ifdef CONFIG_VE
+	if (!get_exec_env()->sysfs_sb)
+		return 0;
+#endif
 	BUG_ON(!kobj || !kobj->dentry || !name);
 
 	down(&dentry->d_inode->i_sem);
@@ -102,6 +106,10 @@ int sysfs_create_link(struct kobject * k
 
 void sysfs_remove_link(struct kobject * kobj, char * name)
 {
+#ifdef CONFIG_VE
+	if(!get_exec_env()->sysfs_sb)
+		return;
+#endif
 	sysfs_hash_and_remove(kobj->dentry,name);
 }
 
diff -Nurap linux-2.6.9-100.orig/fs/sysfs/sysfs.h linux-2.6.9-ve023stab054/fs/sysfs/sysfs.h
--- linux-2.6.9-100.orig/fs/sysfs/sysfs.h	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/sysfs/sysfs.h	2011-06-15 19:26:19.000000000 +0400
@@ -1,5 +1,13 @@
 
-extern struct vfsmount * sysfs_mount;
+#ifndef CONFIG_VE
+extern struct vfsmount *sysfs_mount;
+extern struct super_block *sysfs_sb;
+#define visible_sysfs_mount sysfs_mount
+#define visible_sysfs_sb sysfs_sb
+#else
+#define visible_sysfs_mount (get_exec_env()->sysfs_mnt)
+#define visible_sysfs_sb  (get_exec_env()->sysfs_sb)
+#endif
 
 extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
 extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
diff -Nurap linux-2.6.9-100.orig/fs/udf/file.c linux-2.6.9-ve023stab054/fs/udf/file.c
--- linux-2.6.9-100.orig/fs/udf/file.c	2004-10-19 01:54:32.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/udf/file.c	2011-06-15 19:26:19.000000000 +0400
@@ -188,7 +188,7 @@ int udf_ioctl(struct inode *inode, struc
 {
 	int result = -EINVAL;
 
-	if ( permission(inode, MAY_READ, NULL) != 0 )
+	if ( permission(inode, MAY_READ, NULL, NULL) != 0 )
 	{
 		udf_debug("no permission to access inode %lu\n",
 						inode->i_ino);
diff -Nurap linux-2.6.9-100.orig/fs/vzdq_file.c linux-2.6.9-ve023stab054/fs/vzdq_file.c
--- linux-2.6.9-100.orig/fs/vzdq_file.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/fs/vzdq_file.c	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,895 @@
+/*
+ *
+ * Copyright (C) 2005 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo quota files as proc entry implementation.
+ * It is required for std quota tools to work correctly as they are expecting
+ * aquota.user and aquota.group files.
+ */
+
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
+#include <linux/quotaio_v2.h>
+#include <asm/uaccess.h>
+
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/vzdq_tree.h>
+#include <linux/vzquota.h>
+
+/* ----------------------------------------------------------------------
+ *
+ * File read operation
+ *
+ * FIXME: functions in this section (as well as many functions in vzdq_ugid.c,
+ * perhaps) abuse vz_quota_sem.
+ * Taking a global semaphore for lengthy and user-controlled operations inside
+ * VPSs is not a good idea in general.
+ * In this case, the reasons for taking this semaphore are completely unclear,
+ * especially taking into account that the only function that has comments
+ * about the necessity to be called under this semaphore
+ * (create_proc_quotafile) is actually called OUTSIDE it.
+ *
+ * --------------------------------------------------------------------- */
+
+#define DQBLOCK_SIZE		1024
+#define DQUOTBLKNUM		21U
+#define DQTREE_DEPTH		4
+#define TREENUM_2_BLKNUM(num)	(((num) + 1) << 1)
+#define ISINDBLOCK(num)		((num)%2 != 0)
+#define FIRST_DATABLK	  	2  /* first even number */
+#define LAST_IND_LEVEL		(DQTREE_DEPTH - 1)
+#define CONVERT_LEVEL(level)	((level) * (QUOTAID_EBITS/QUOTAID_BBITS))
+#define GETLEVINDX(ind, lev)	(((ind) >> QUOTAID_BBITS*(lev)) \
+					& QUOTATREE_BMASK)
+
+#if (QUOTAID_EBITS / QUOTAID_BBITS) != (QUOTATREE_DEPTH / DQTREE_DEPTH)
+#error xBITS and DQTREE_DEPTH does not correspond
+#endif
+
+#define BLOCK_NOT_FOUND	1
+
+/* data for quota file -- one per proc entry */
+struct quotatree_data {
+	struct list_head	list;
+	struct vz_quota_master	*qmblk;
+	int			type;	/* type of the tree */
+};
+
+/* serialized by vz_quota_sem */
+static LIST_HEAD(qf_data_head);
+
+static const u_int32_t vzquota_magics[] = V2_INITQMAGICS;
+static const u_int32_t vzquota_versions[] = V2_INITQVERSIONS;
+
+static inline loff_t get_depoff(int depth)
+{
+	loff_t res = 1;
+	while (depth) {
+		res += (1 << ((depth - 1)*QUOTAID_EBITS + 1));
+		depth--;
+	}
+	return res;
+}
+
+static inline loff_t get_blknum(loff_t num, int depth)
+{
+	loff_t res;
+	res = (num << 1) + get_depoff(depth);
+	return res;
+}
+
+static int get_depth(loff_t num)
+{
+	int i;
+	for (i = 0; i < DQTREE_DEPTH; i++) {
+		if (num >= get_depoff(i) && (i == DQTREE_DEPTH - 1
+				|| num < get_depoff(i + 1)))
+			return i;
+	}
+	return -1;
+}
+
+static inline loff_t get_offset(loff_t num)
+{
+	loff_t res, tmp;
+
+	tmp = get_depth(num);
+	if (tmp < 0)
+		return -1;
+	num -= get_depoff(tmp);
+	BUG_ON(num < 0);
+	res = num >> 1;
+
+	return res;
+}
+
+static inline loff_t get_quot_blk_num(struct quotatree_tree *tree, int level)
+{
+	/* return maximum available block num */
+	return tree->levels[level].freenum;
+}
+
+static inline loff_t get_block_num(struct quotatree_tree *tree)
+{
+	loff_t ind_blk_num, quot_blk_num, max_ind, max_quot;
+
+	quot_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH) - 1);
+	max_quot = TREENUM_2_BLKNUM(quot_blk_num);
+	ind_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH - 1));
+	max_ind = (quot_blk_num) ? get_blknum(ind_blk_num, LAST_IND_LEVEL)
+		: get_blknum(ind_blk_num, 0);
+
+	return (max_ind > max_quot) ? max_ind + 1 : max_quot + 1;
+}
+
+/*  Write quota file header */
+static int read_header(void *buf, struct quotatree_tree *tree,
+	struct dq_info *dq_ugid_info, int type)
+{
+	struct v2_disk_dqheader *dqh;
+	struct v2_disk_dqinfo *dq_disk_info;
+
+	dqh = buf;
+	dq_disk_info = buf + sizeof(struct v2_disk_dqheader);
+
+	dqh->dqh_magic = vzquota_magics[type];
+	dqh->dqh_version = vzquota_versions[type];
+
+	dq_disk_info->dqi_bgrace = dq_ugid_info[type].bexpire;
+	dq_disk_info->dqi_igrace = dq_ugid_info[type].iexpire;
+	dq_disk_info->dqi_flags = 0;	/* no flags */
+	dq_disk_info->dqi_blocks = get_block_num(tree);
+	dq_disk_info->dqi_free_blk = 0;	/* first block in the file */
+	dq_disk_info->dqi_free_entry = FIRST_DATABLK;
+
+	return 0;
+}
+
+static int get_block_child(int depth, struct quotatree_node *p, u_int32_t *buf)
+{
+	int i, j, lev_num;
+
+	lev_num = QUOTATREE_DEPTH/DQTREE_DEPTH - 1;
+	for (i = 0; i < BLOCK_SIZE/sizeof(u_int32_t); i++) {
+		struct quotatree_node *next, *parent;
+
+		parent = p;
+		next = p;
+		for (j = lev_num; j >= 0; j--) {
+			if (!next->blocks[GETLEVINDX(i,j)]) {
+				buf[i] = 0;
+				goto bad_branch;
+			}
+			parent = next;
+			next = next->blocks[GETLEVINDX(i,j)];
+		}
+		buf[i] = (depth == DQTREE_DEPTH - 1) ?
+			TREENUM_2_BLKNUM(parent->num)
+			: get_blknum(next->num, depth + 1);
+
+	bad_branch:
+		;
+	}
+
+	return 0;
+}
+
+/*
+ * Write index block to disk (or buffer)
+ * @buf has length 256*sizeof(u_int32_t) bytes
+ */
+static int read_index_block(int num, u_int32_t *buf,
+		struct quotatree_tree *tree)
+{
+	struct quotatree_node *p;
+	u_int32_t index;
+	loff_t off;
+	int depth, res;
+
+	res = BLOCK_NOT_FOUND; 
+	index = 0;
+	depth = get_depth(num);
+	off = get_offset(num);
+	if (depth < 0 || off < 0)
+		return -EINVAL;
+
+	list_for_each_entry(p, &tree->levels[CONVERT_LEVEL(depth)].usedlh,
+			list) {
+		if (p->num >= off)
+			res = 0;
+		if (p->num != off)
+			continue;
+		get_block_child(depth, p, buf);
+		break;
+	}
+
+	return res;
+}
+
+static inline void convert_quot_format(struct v2_disk_dqblk *dq,
+		struct vz_quota_ugid *vzq)
+{
+	dq->dqb_id = vzq->qugid_id;
+	dq->dqb_ihardlimit = vzq->qugid_stat.ihardlimit;
+	dq->dqb_isoftlimit = vzq->qugid_stat.isoftlimit;
+	dq->dqb_curinodes = vzq->qugid_stat.icurrent;
+	dq->dqb_bhardlimit = vzq->qugid_stat.bhardlimit / QUOTABLOCK_SIZE;
+	dq->dqb_bsoftlimit = vzq->qugid_stat.bsoftlimit / QUOTABLOCK_SIZE;
+	dq->dqb_curspace = vzq->qugid_stat.bcurrent;
+	dq->dqb_btime = vzq->qugid_stat.btime;
+	dq->dqb_itime = vzq->qugid_stat.itime;
+}
+
+static int read_dquot(loff_t num, void *buf, struct quotatree_tree *tree)
+{
+	int res, i, entries = 0;
+	struct v2_disk_dqdbheader *dq_header;
+	struct quotatree_node *p;
+	struct v2_disk_dqblk *blk = buf + sizeof(struct v2_disk_dqdbheader);
+
+	res = BLOCK_NOT_FOUND;
+	dq_header = buf;
+	memset(dq_header, 0, sizeof(*dq_header));
+
+	list_for_each_entry(p, &(tree->levels[QUOTATREE_DEPTH - 1].usedlh),
+			list) {
+		if (TREENUM_2_BLKNUM(p->num) >= num)
+			res = 0;
+		if (TREENUM_2_BLKNUM(p->num) != num)
+			continue;
+
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {
+			if (!p->blocks[i])
+				continue;
+			convert_quot_format(blk + entries,
+					(struct vz_quota_ugid *)p->blocks[i]);
+			entries++;
+			res = 0;
+		}
+		break;
+	}
+	dq_header->dqdh_entries = entries;
+
+	return res;
+}
+
+static int read_block(int num, void *buf, struct quotatree_tree *tree,
+	struct dq_info *dq_ugid_info, int magic)
+{
+	int res;
+
+	memset(buf, 0, DQBLOCK_SIZE);
+	if (!num)
+		res = read_header(buf, tree, dq_ugid_info, magic);
+	else if (ISINDBLOCK(num))
+		res = read_index_block(num, (u_int32_t*)buf, tree);
+	else
+		res = read_dquot(num, buf, tree);
+
+	return res;
+}
+
+/*
+ * FIXME: this function can handle quota files up to 2GB only.
+ */
+static int read_proc_quotafile(char *page, char **start, off_t off, int count,
+		int *eof, void *data)
+{
+	off_t blk_num, blk_off, buf_off;
+	char *tmp;
+	size_t buf_size;
+	struct quotatree_data *qtd;
+	struct quotatree_tree *tree;
+	struct dq_info *dqi;
+	int res;
+
+	tmp = kmalloc(DQBLOCK_SIZE, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	qtd = data;
+	down(&vz_quota_sem);
+	down(&qtd->qmblk->dq_sem);
+
+	res = 0;
+	tree = QUGID_TREE(qtd->qmblk, qtd->type);
+	if (!tree) {
+		*eof = 1;
+		goto out_dq;
+	}
+
+	dqi = &qtd->qmblk->dq_ugid_info[qtd->type];
+
+	buf_off = 0;
+	buf_size = count;
+	blk_num = off / DQBLOCK_SIZE;
+	blk_off = off % DQBLOCK_SIZE;
+
+	while (buf_size > 0) {
+		off_t len;
+
+		len = min((size_t)(DQBLOCK_SIZE-blk_off), buf_size);
+		res = read_block(blk_num, tmp, tree, dqi, qtd->type);
+		if (res < 0)
+			goto out_err;
+		if (res == BLOCK_NOT_FOUND) {
+			*eof = 1;
+			break;
+		} 
+		memcpy(page + buf_off, tmp + blk_off, len);
+
+		blk_num++;
+		buf_size -= len;
+		blk_off = 0;
+		buf_off += len;
+	}
+	res = buf_off;
+
+out_err:
+	*start = NULL + count;
+out_dq:
+	up(&qtd->qmblk->dq_sem);
+	up(&vz_quota_sem);
+	kfree(tmp);
+
+	return res;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota/QID/aquota.* files
+ *
+ * FIXME: this code lacks serialization of read/readdir/lseek.
+ * However, this problem should be fixed after the mainstream issue of what
+ * appears to be non-atomic read and update of file position in sys_read.
+ *
+ * --------------------------------------------------------------------- */
+
+static inline unsigned long vzdq_aquot_getino(dev_t dev)
+{
+	return 0xec000000UL + dev;
+}
+
+static inline dev_t vzdq_aquot_getidev(struct inode *inode)
+{
+	return (dev_t)(unsigned long)PROC_I(inode)->op.proc_get_link;
+}
+
+static inline void vzdq_aquot_setidev(struct inode *inode, dev_t dev)
+{
+	PROC_I(inode)->op.proc_get_link = (void *)(unsigned long)dev;
+}
+
+static ssize_t vzdq_aquotf_read(struct file *file,
+		char __user *buf, size_t size, loff_t *ppos)
+{
+	char *page;
+	size_t bufsize;
+	ssize_t l, l2, copied;
+	char *start;
+	struct inode *inode;
+	struct block_device *bdev;
+	struct super_block *sb;
+	struct quotatree_data data;
+	int eof, err;
+
+	err = -ENOMEM;
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (page == NULL)
+		goto out_err;
+
+	err = -ENODEV;
+	inode = file->f_dentry->d_inode;
+	bdev = bdget(vzdq_aquot_getidev(inode));
+	if (bdev == NULL)
+		goto out_err;
+	sb = get_super(bdev);
+	bdput(bdev);
+	if (sb == NULL)
+		goto out_err;
+	data.qmblk = vzquota_find_qmblk(sb);
+	data.type = PROC_I(inode)->type - 1;
+	drop_super(sb);
+	if (data.qmblk == NULL || data.qmblk == VZ_QUOTA_BAD)
+		goto out_err;
+
+	copied = 0;
+	l = l2 = 0;
+	while (1) {
+		bufsize = min(size, (size_t)PAGE_SIZE);
+		if (bufsize <= 0)
+			break;
+
+		l = read_proc_quotafile(page, &start, *ppos, bufsize,
+				&eof, &data);
+		if (l <= 0)
+			break;
+
+		l2 = copy_to_user(buf, page, l);
+		copied += l - l2;
+		if (l2)
+			break;
+
+		buf += l;
+		size -= l;
+		*ppos += (unsigned long)start;
+		l = l2 = 0;
+	}
+
+	qmblk_put(data.qmblk);
+	free_page((unsigned long)page);
+	if (copied)
+		return copied;
+	else if (l2)		/* last copy_to_user failed */
+		return -EFAULT;
+	else			/* read error or EOF */
+		return l;
+
+out_err:
+	if (page != NULL)
+		free_page((unsigned long)page);
+	return err;
+}
+
+static struct file_operations vzdq_aquotf_file_operations = {
+	.read		= &vzdq_aquotf_read,
+};
+
+static struct inode_operations vzdq_aquotf_inode_operations = {
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota/QID directory
+ *
+ * --------------------------------------------------------------------- */
+
+static int vzdq_aquotq_readdir(struct file *file, void *data, filldir_t filler)
+{
+	loff_t n;
+	int err;
+
+	n = file->f_pos;
+	for (err = 0; !err; n++) {
+		/* ppc32 can't cmp 2 long long's in switch, calls __cmpdi2() */
+		switch ((unsigned long)n) {
+		case 0:
+			err = (*filler)(data, ".", 1, n,
+					file->f_dentry->d_inode->i_ino,
+					DT_DIR);
+			break;
+		case 1:
+			err = (*filler)(data, "..", 2, n,
+					parent_ino(file->f_dentry), DT_DIR);
+			break;
+		case 2:
+			err = (*filler)(data, "aquota.user", 11, n,
+					file->f_dentry->d_inode->i_ino
+								+ USRQUOTA + 1,
+					DT_REG);
+			break;
+		case 3:
+			err = (*filler)(data, "aquota.group", 12, n,
+					file->f_dentry->d_inode->i_ino 
+								+ GRPQUOTA + 1,
+					DT_REG);
+			break;
+		default:
+			goto out;
+		}
+	}
+out:
+	file->f_pos = n;
+	return err;
+}
+
+struct vzdq_aquotq_lookdata {
+	dev_t dev;
+	int type;
+	struct vz_quota_master *qmblk;
+};
+
+static int vzdq_aquotq_looktest(struct inode *inode, void *data)
+{
+	struct vzdq_aquotq_lookdata *d;
+
+	d = data;
+	return inode->i_op == &vzdq_aquotf_inode_operations &&
+	       vzdq_aquot_getidev(inode) == d->dev &&
+	       PROC_I(inode)->type == d->type + 1;
+}
+
+static int vzdq_aquotq_lookset(struct inode *inode, void *data)
+{
+	struct vzdq_aquotq_lookdata *d;
+	struct quotatree_tree *tree;
+
+	d = data;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_ino = vzdq_aquot_getino(d->dev) + d->type + 1;
+	inode->i_mode = S_IFREG | S_IRUSR;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_nlink = 1;
+	inode->i_op = &vzdq_aquotf_inode_operations;
+	inode->i_fop = &vzdq_aquotf_file_operations;
+	PROC_I(inode)->type = d->type + 1;
+	vzdq_aquot_setidev(inode, d->dev);
+
+	/* Setting size */
+	tree = QUGID_TREE(d->qmblk, d->type);
+	inode->i_size = get_block_num(tree) * 1024;
+	return 0;
+}
+
+static int vzdq_aquotq_revalidate(struct dentry *vdentry, struct nameidata *nd)
+{
+	return 0;
+}
+
+static struct dentry_operations vzdq_aquotq_dentry_operations = {
+	.d_revalidate	= &vzdq_aquotq_revalidate,
+};
+
+static struct vz_quota_master *find_qmblk_by_dev(dev_t dev)
+{
+	struct super_block *sb;
+	struct vz_quota_master *qmblk;
+
+	qmblk = NULL;
+	sb = user_get_super(dev);
+	if (sb != NULL) {
+		qmblk = vzquota_find_qmblk(sb);
+		drop_super(sb);
+
+		if (qmblk == VZ_QUOTA_BAD)
+			qmblk = NULL;
+	}
+
+	return qmblk;
+}
+
+static struct dentry *vzdq_aquotq_lookup(struct inode *dir,
+		struct dentry *dentry,
+		struct nameidata *nd)
+{
+	struct inode *inode;
+	struct vzdq_aquotq_lookdata d;
+	int k;
+
+	if (dentry->d_name.len == 11) {
+		if (memcmp(dentry->d_name.name, "aquota.user", 11))
+			goto out;
+		k = USRQUOTA;
+	} else if (dentry->d_name.len == 12) {
+		if (memcmp(dentry->d_name.name, "aquota.group", 12))
+			goto out;
+		k = GRPQUOTA;
+	} else
+		goto out;
+	d.dev = vzdq_aquot_getidev(dir);
+	d.type = k;
+	d.qmblk = find_qmblk_by_dev(d.dev);
+	if (d.qmblk == NULL)
+		goto out;
+
+	inode = iget5_locked(dir->i_sb, dir->i_ino + k + 1,
+			vzdq_aquotq_looktest, vzdq_aquotq_lookset, &d);
+
+	/* qmlbk ref is not needed, we used it for i_size calculation only */
+	qmblk_put(d.qmblk);
+	if (inode == NULL)
+		goto out;
+
+	unlock_new_inode(inode);
+	dentry->d_op = &vzdq_aquotq_dentry_operations;
+	d_add(dentry, inode);
+	return NULL;
+
+out:
+	return ERR_PTR(-ENOENT);
+}
+
+static struct file_operations vzdq_aquotq_file_operations = {
+	.read		= &generic_read_dir,
+	.readdir	= &vzdq_aquotq_readdir,
+};
+
+static struct inode_operations vzdq_aquotq_inode_operations = {
+	.lookup		= &vzdq_aquotq_lookup,
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota directory
+ *
+ * --------------------------------------------------------------------- */
+
+struct vzdq_aquot_de {
+	struct list_head list;
+	struct vfsmount *mnt;
+};
+
+static int vzdq_aquot_buildmntlist(struct ve_struct *ve,
+		struct list_head *head)
+{
+	struct vfsmount *rmnt, *mnt;
+	struct vzdq_aquot_de *p;
+	int err;
+
+#ifdef CONFIG_VE
+	rmnt = mntget(ve->fs_rootmnt);
+#else
+	read_lock(&current->fs->lock);
+	rmnt = mntget(current->fs->rootmnt);
+	read_unlock(&current->fs->lock);
+#endif
+	mnt = rmnt;
+	down_read(&rmnt->mnt_namespace->sem);
+	while (1) {
+		list_for_each_entry(p, head, list) {
+			if (p->mnt->mnt_sb == mnt->mnt_sb)
+				goto skip;
+		}
+
+		err = -ENOMEM;
+		p = kmalloc(sizeof(*p), GFP_ATOMIC);
+		if (p == NULL)
+			goto out;
+		p->mnt = mntget(mnt);
+		list_add_tail(&p->list, head);
+
+skip:
+		err = 0;
+		if (list_empty(&mnt->mnt_mounts)) {
+			while (1) {
+				if (mnt == rmnt)
+					goto out;
+				if (mnt->mnt_child.next !=
+						&mnt->mnt_parent->mnt_mounts)
+					break;
+				mnt = mnt->mnt_parent;
+			}
+			mnt = list_entry(mnt->mnt_child.next,
+					struct vfsmount, mnt_child);
+		} else
+			mnt = list_first_entry(&mnt->mnt_mounts,
+					struct vfsmount, mnt_child);
+	}
+out:
+	up_read(&rmnt->mnt_namespace->sem);
+	mntput(rmnt);
+	return err;
+}
+
+static void vzdq_aquot_releasemntlist(struct ve_struct *ve,
+		struct list_head *head)
+{
+	struct vzdq_aquot_de *p;
+
+	while (!list_empty(head)) {
+		p = list_first_entry(head, typeof(*p), list);
+		mntput(p->mnt);
+		list_del(&p->list);
+		kfree(p);
+	}
+}
+
+static int vzdq_aquotd_readdir(struct file *file, void *data, filldir_t filler)
+{
+	struct ve_struct *ve, *old_ve;
+	struct list_head mntlist;
+	struct vzdq_aquot_de *de;
+	struct super_block *sb;
+	struct vz_quota_master *qmblk;
+	loff_t i, n;
+	char buf[24];
+	int l, err;
+
+	i = 0;
+	n = file->f_pos;
+	ve = VE_OWNER_FSTYPE(file->f_dentry->d_sb->s_type);
+	old_ve = set_exec_env(ve);
+
+	INIT_LIST_HEAD(&mntlist);
+#ifdef CONFIG_VE
+	/*
+	 * The only reason of disabling readdir for the host system is that
+	 * this readdir can be slow and CPU consuming with large number of VPSs
+	 * (or just mount points).
+	 */
+	err = ve_is_super(ve);
+#else
+	err = 0;
+#endif
+	if (!err) {
+		err = vzdq_aquot_buildmntlist(ve, &mntlist);
+		if (err)
+			goto out_err;
+	}
+
+	if (i >= n) {
+		if ((*filler)(data, ".", 1, i,
+					file->f_dentry->d_inode->i_ino, DT_DIR))
+			goto out_fill;
+	}
+	i++;
+
+	if (i >= n) {
+		if ((*filler)(data, "..", 2, i,
+					parent_ino(file->f_dentry), DT_DIR))
+			goto out_fill;
+	}
+	i++;
+
+	list_for_each_entry (de, &mntlist, list) {
+		sb = de->mnt->mnt_sb;
+#ifdef CONFIG_VE
+		if (get_device_perms_ve(S_IFBLK, sb->s_dev, FMODE_QUOTACTL))
+			continue;
+#endif
+		qmblk = vzquota_find_qmblk(sb);
+		if (qmblk == NULL || qmblk == VZ_QUOTA_BAD)
+			continue;
+
+		qmblk_put(qmblk);
+		i++;
+		if (i <= n)
+			continue;
+
+		l = sprintf(buf, "%08x", new_encode_dev(sb->s_dev));
+		if ((*filler)(data, buf, l, i - 1,
+					vzdq_aquot_getino(sb->s_dev), DT_DIR))
+			break;
+	}
+
+out_fill:
+	err = 0;
+	file->f_pos = i;
+out_err:
+	vzdq_aquot_releasemntlist(ve, &mntlist);
+	set_exec_env(old_ve);
+	return err;
+}
+
+static int vzdq_aquotd_looktest(struct inode *inode, void *data)
+{
+	return inode->i_op == &vzdq_aquotq_inode_operations &&
+	       vzdq_aquot_getidev(inode) == (dev_t)(unsigned long)data;
+}
+
+static int vzdq_aquotd_lookset(struct inode *inode, void *data)
+{
+	dev_t dev;
+
+	dev = (dev_t)(unsigned long)data;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_ino = vzdq_aquot_getino(dev);
+	inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_nlink = 2;
+	inode->i_op = &vzdq_aquotq_inode_operations;
+	inode->i_fop = &vzdq_aquotq_file_operations;
+	vzdq_aquot_setidev(inode, dev);
+	return 0;
+}
+
+static struct dentry *vzdq_aquotd_lookup(struct inode *dir,
+		struct dentry *dentry,
+		struct nameidata *nd)
+{
+	struct ve_struct *ve, *old_ve;
+	const unsigned char *s;
+	int l;
+	dev_t dev;
+	struct inode *inode;
+
+	ve = VE_OWNER_FSTYPE(dir->i_sb->s_type);
+	old_ve = set_exec_env(ve);
+#ifdef CONFIG_VE
+	/*
+	 * Lookup is much lighter than readdir, so it can be allowed for the
+	 * host system.  But it would be strange to be able to do lookup only
+	 * without readdir...
+	 */
+	if (ve_is_super(ve))
+		goto out;
+#endif
+
+	dev = 0;
+	l = dentry->d_name.len;
+	if (l <= 0)
+		goto out;
+	for (s = dentry->d_name.name; l > 0; s++, l--) {
+		if (!isxdigit(*s))
+			goto out;
+		if (dev & ~(~0UL >> 4))
+			goto out;
+		dev <<= 4;
+		if (isdigit(*s))
+			dev += *s - '0';
+		else if (islower(*s))
+			dev += *s - 'a' + 10;
+		else
+			dev += *s - 'A' + 10;
+	}
+	dev = new_decode_dev(dev);
+
+#ifdef CONFIG_VE
+	if (get_device_perms_ve(S_IFBLK, dev, FMODE_QUOTACTL))
+		goto out;
+#endif
+
+	inode = iget5_locked(dir->i_sb, vzdq_aquot_getino(dev),
+			vzdq_aquotd_looktest, vzdq_aquotd_lookset,
+			(void *)(unsigned long)dev);
+	if (inode == NULL)
+		goto out;
+	unlock_new_inode(inode);
+
+	d_add(dentry, inode);
+	set_exec_env(old_ve);
+	return NULL;
+
+out:
+	set_exec_env(old_ve);
+	return ERR_PTR(-ENOENT);
+}
+
+static struct file_operations vzdq_aquotd_file_operations = {
+	.read		= &generic_read_dir,
+	.readdir	= &vzdq_aquotd_readdir,
+};
+
+static struct inode_operations vzdq_aquotd_inode_operations = {
+	.lookup		= &vzdq_aquotd_lookup,
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Initialization and deinitialization
+ *
+ * --------------------------------------------------------------------- */
+
+/*
+ * FIXME: creation of proc entries here is unsafe with respect to module
+ * unloading.
+ */
+void vzaquota_init(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_glob_entry("vz/vzaquota",
+			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
+	if (de != NULL) {
+		de->proc_iops = &vzdq_aquotd_inode_operations;
+		de->proc_fops = &vzdq_aquotd_file_operations;
+	} else
+		printk("VZDQ: vz/vzaquota creation failed\n");
+#if defined(CONFIG_SYSCTL)
+	de = create_proc_glob_entry("sys/fs/quota",
+			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
+	if (de == NULL)
+		printk("VZDQ: sys/fs/quota creation failed\n");
+#endif
+}
+
+void vzaquota_fini(void)
+{
+	remove_proc_entry("vz/vzaquota", NULL);
+}
diff -Nurap linux-2.6.9-100.orig/fs/vzdq_mgmt.c linux-2.6.9-ve023stab054/fs/vzdq_mgmt.c
--- linux-2.6.9-100.orig/fs/vzdq_mgmt.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/fs/vzdq_mgmt.c	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,727 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <asm/semaphore.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/quota.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+
+
+/* ----------------------------------------------------------------------
+ * Switching quota on.
+ * --------------------------------------------------------------------- */
+
+/*
+ * check limits copied from user
+ */
+int vzquota_check_sane_limits(struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+
+	/* softlimit must be less then hardlimit */
+	if (qstat->bsoftlimit > qstat->bhardlimit)
+		goto out;
+
+	if (qstat->isoftlimit > qstat->ihardlimit)
+		goto out;
+
+	err = 0;
+out:
+	return err;
+}
+
+/*
+ * check usage values copied from user
+ */
+int vzquota_check_sane_values(struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+
+	/* expiration time must not be set if softlimit was not exceeded */
+	if (qstat->bcurrent < qstat->bsoftlimit && qstat->btime != (time_t)0)
+		goto out;
+
+	if (qstat->icurrent < qstat->isoftlimit && qstat->itime != (time_t)0)
+		goto out;
+
+	err = vzquota_check_sane_limits(qstat);
+out:
+	return err;
+}
+
+/*
+ * create new quota master block
+ * this function should:
+ *  - copy limits and usage parameters from user buffer;
+ *  - allock, initialize quota block and insert it to hash;
+ */
+static int vzquota_create(unsigned int quota_id, struct vz_quota_stat *u_qstat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem);
+
+	err = -EFAULT;
+	if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
+		goto out;
+
+	err = -EINVAL;
+	if (quota_id == 0)
+		goto out;
+
+	if (vzquota_check_sane_values(&qstat.dq_stat))
+		goto out;
+	err = 0;
+	qmblk = vzquota_alloc_master(quota_id, &qstat);
+
+	if (IS_ERR(qmblk)) /* ENOMEM or EEXIST */
+		err = PTR_ERR(qmblk);
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+/**
+ * vzquota_on - turn quota on
+ *
+ * This function should:
+ *  - find and get refcnt of directory entry for quota root and corresponding
+ *    mountpoint;
+ *  - find corresponding quota block and mark it with given path;
+ *  - check quota tree;
+ *  - initialize quota for the tree root.
+ */
+static int vzquota_on(unsigned int quota_id, const char *quota_root)
+{
+	int err;
+	struct nameidata nd;
+	struct vz_quota_master *qmblk;
+	struct super_block *dqsb;
+
+	dqsb = NULL;
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out;
+
+	err = user_path_walk(quota_root, &nd);
+	if (err)
+		goto out;
+	/* init path must be a directory */
+	err = -ENOTDIR;
+	if (!S_ISDIR(nd.dentry->d_inode->i_mode))
+		goto out_path;
+
+	qmblk->dq_root_dentry = nd.dentry;
+	qmblk->dq_root_mnt = nd.mnt;
+	qmblk->dq_sb = nd.dentry->d_inode->i_sb;
+	err = vzquota_get_super(qmblk->dq_sb);
+	if (err)
+		goto out_super;
+
+	/*
+	 * Serialization with quota initialization and operations is performed
+	 * through generation check: generation is memorized before qmblk is
+	 * found and compared under inode_qmblk_lock with assignment.
+	 *
+	 * Note that the dentry tree is shrunk only for high-level logical
+	 * serialization, purely as a courtesy to the user: to have consistent
+	 * quota statistics, files should be closed etc. on quota on.
+	 */
+	err = vzquota_on_qmblk(qmblk->dq_sb, qmblk->dq_root_dentry->d_inode,
+			qmblk);
+	if (err)
+		goto out_init;
+	qmblk->dq_state = VZDQ_WORKING;
+
+	up(&vz_quota_sem);
+	return 0;
+
+out_init:
+	dqsb = qmblk->dq_sb;
+out_super:
+	/* clear for qmblk_put/quota_free_master */
+	qmblk->dq_sb = NULL;
+	qmblk->dq_root_dentry = NULL;
+	qmblk->dq_root_mnt = NULL;
+out_path:
+	path_release(&nd);
+out:
+	if (dqsb)
+		vzquota_put_super(dqsb);
+	up(&vz_quota_sem);
+	return err;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Switching quota off.
+ * --------------------------------------------------------------------- */
+
+/*
+ * destroy quota block by ID
+ */
+static int vzquota_destroy(unsigned int quota_id)
+{
+	int err;
+	struct vz_quota_master *qmblk;
+	struct dentry *dentry;
+	struct vfsmount *mnt;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EBUSY;
+	if (qmblk->dq_state == VZDQ_WORKING)
+		goto out; /* quota_off first */
+
+	list_del_init(&qmblk->dq_hash);
+	dentry = qmblk->dq_root_dentry;
+	qmblk->dq_root_dentry = NULL;
+	mnt = qmblk->dq_root_mnt;
+	qmblk->dq_root_mnt = NULL;
+
+	if (qmblk->dq_sb)
+		vzquota_put_super(qmblk->dq_sb);
+	up(&vz_quota_sem);
+
+	qmblk_put(qmblk);
+	dput(dentry);
+	mntput(mnt);
+	return 0;
+
+out:
+	up(&vz_quota_sem);
+	return err;
+}
+
+/**
+ * vzquota_off - turn quota off
+ */
+
+static int __vzquota_sync_list(struct list_head *lh,
+		struct vz_quota_master *qmblk,
+		enum writeback_sync_modes sync_mode)
+{
+	struct writeback_control wbc;
+	LIST_HEAD(list);
+	struct vz_quota_ilink *qlnk;
+	struct inode *inode;
+	int err, ret;
+
+	memset(&wbc, 0, sizeof(wbc));
+	wbc.sync_mode = sync_mode;
+
+	err = ret = 0;
+	while (!list_empty(lh)) {
+		if (need_resched()) {
+			inode_qmblk_unlock(qmblk->dq_sb);
+			schedule();
+			inode_qmblk_lock(qmblk->dq_sb);
+			continue;
+		}
+
+		qlnk = list_first_entry(lh, struct vz_quota_ilink, list);
+		list_move(&qlnk->list, &list);
+
+		inode = igrab(QLNK_INODE(qlnk));
+		if (!inode)
+			continue;
+
+		inode_qmblk_unlock(qmblk->dq_sb);
+
+		wbc.nr_to_write = LONG_MAX;
+		ret = sync_inode(inode, &wbc);
+		if (ret)
+			err = ret;
+		iput(inode);
+
+		inode_qmblk_lock(qmblk->dq_sb);
+	}
+
+	list_splice(&list, lh);
+	return err;
+}
+
+static int vzquota_sync_list(struct list_head *lh,
+		struct vz_quota_master *qmblk)
+{
+	(void)__vzquota_sync_list(lh, qmblk, WB_SYNC_NONE);
+	return __vzquota_sync_list(lh, qmblk, WB_SYNC_ALL);
+}
+
+static int vzquota_sync_inodes(struct vz_quota_master *qmblk)
+{
+	int err;
+	LIST_HEAD(qlnk_list);
+
+	list_splice_init(&qmblk->dq_ilink_list, &qlnk_list);
+	err = vzquota_sync_list(&qlnk_list, qmblk);
+	if (!err && !list_empty(&qmblk->dq_ilink_list))
+		err = -EBUSY;
+	list_splice(&qlnk_list, &qmblk->dq_ilink_list);
+
+	return err;
+}
+
+static int vzquota_off(unsigned int quota_id)
+{
+	int err, ret;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EALREADY;
+	if (qmblk->dq_state != VZDQ_WORKING)
+		goto out;
+
+	inode_qmblk_lock(qmblk->dq_sb); /* protects dq_ilink_list also */
+	ret = vzquota_sync_inodes(qmblk);
+	inode_qmblk_unlock(qmblk->dq_sb);
+
+	err = vzquota_off_qmblk(qmblk->dq_sb, qmblk);
+	if (err)
+		goto out;
+
+	err = ret;
+	/* vzquota_destroy will free resources */
+	qmblk->dq_state = VZDQ_STOPING;
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Other VZQUOTA ioctl's.
+ * --------------------------------------------------------------------- */
+
+/*
+ * this function should:
+ * - set new limits/buffer under quota master block lock
+ * - if new softlimit less then usage, then set expiration time
+ * - no need to alloc ugid hash table - we'll do that on demand
+ */
+int vzquota_update_limit(struct dq_stat *_qstat,
+		struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+	if (vzquota_check_sane_limits(qstat))
+		goto out;
+
+	err = 0;
+
+	/* limits */
+	_qstat->bsoftlimit = qstat->bsoftlimit;
+	_qstat->bhardlimit = qstat->bhardlimit;
+	/*
+	 * If the soft limit is exceeded, administrator can override the moment
+	 * when the grace period for limit exceeding ends.
+	 * Specifying the moment may be useful if the soft limit is set to be
+	 * lower than the current usage.  In the latter case, if the grace
+	 * period end isn't specified, the grace period will start from the
+	 * moment of the first write operation.
+	 * There is a race with the user level.  Soft limit may be already
+	 * exceeded before the limit change, and grace period end calculated by
+	 * the kernel will be overriden.  User level may check if the limit is
+	 * already exceeded, but check and set calls are not atomic.
+	 * This race isn't dangerous.  Under normal cicrumstances, the
+	 * difference between the grace period end calculated by the kernel and
+	 * the user level should be not greater than as the difference between
+	 * the moments of check and set calls, i.e. not bigger than the quota
+	 * timer resolution - 1 sec.
+	 */
+	if (qstat->btime != (time_t)0 &&
+			_qstat->bcurrent >= _qstat->bsoftlimit)
+		_qstat->btime = qstat->btime;
+
+	_qstat->isoftlimit = qstat->isoftlimit;
+	_qstat->ihardlimit = qstat->ihardlimit;
+	if (qstat->itime != (time_t)0 &&
+			_qstat->icurrent >= _qstat->isoftlimit)
+		_qstat->itime = qstat->itime;
+
+out:
+	return err;
+}
+
+/*
+ * set new quota limits.
+ * this function should:
+ *  copy new limits from user level
+ *  - find quota block
+ *  - set new limits and flags.
+ */
+static int vzquota_setlimit(unsigned int quota_id,
+		struct vz_quota_stat *u_qstat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem); /* for hash list protection */
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
+		goto out;
+
+	qmblk_data_write_lock(qmblk);
+	err = vzquota_update_limit(&qmblk->dq_stat, &qstat.dq_stat);
+	if (err == 0)
+		qmblk->dq_info = qstat.dq_info;
+	qmblk_data_write_unlock(qmblk);
+
+out:
+	up(&vz_quota_sem);
+	return err;
+}
+
+/*
+ * get quota limits.
+ * very simple - just return stat buffer to user
+ */
+static int vzquota_getstat(unsigned int quota_id,
+		struct vz_quota_stat *u_qstat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	qmblk_data_read_lock(qmblk);
+	/* copy whole buffer under lock */
+	memcpy(&qstat.dq_stat, &qmblk->dq_stat, sizeof(qstat.dq_stat));
+	memcpy(&qstat.dq_info, &qmblk->dq_info, sizeof(qstat.dq_info));
+	qmblk_data_read_unlock(qmblk);
+
+	err = copy_to_user(u_qstat, &qstat, sizeof(qstat));
+	if (err)
+		err = -EFAULT;
+
+out:
+	up(&vz_quota_sem);
+	return err;
+}
+
+/*
+ * This is a system call to turn per-VE disk quota on.
+ * Note this call is allowed to run ONLY from VE0
+ */
+long do_vzquotactl(int cmd, unsigned int quota_id,
+			  struct vz_quota_stat *qstat, const char *ve_root)
+{
+	int ret;
+
+	ret = -EPERM;
+	/* access allowed only from root of VE0 */
+	if (!capable(CAP_SYS_RESOURCE) ||
+	    !capable(CAP_SYS_ADMIN))
+		goto out;
+
+	switch (cmd) {
+		case VZ_DQ_CREATE:
+			ret = vzquota_create(quota_id, qstat);
+			break;
+		case VZ_DQ_DESTROY:
+			ret = vzquota_destroy(quota_id);
+			break;
+		case VZ_DQ_ON:
+			ret = vzquota_on(quota_id, ve_root);
+			break;
+		case VZ_DQ_OFF:
+			ret = vzquota_off(quota_id);
+			break;
+		case VZ_DQ_SETLIMIT:
+			ret = vzquota_setlimit(quota_id, qstat);
+			break;
+		case VZ_DQ_GETSTAT:
+			ret = vzquota_getstat(quota_id, qstat);
+			break;
+
+		default:
+			ret = -EINVAL;
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Proc filesystem routines
+ * ---------------------------------------------------------------------*/
+
+#if defined(CONFIG_PROC_FS)
+
+#define QUOTA_UINT_LEN		15
+#define QUOTA_TIME_LEN_FMT_UINT	"%11u"
+#define QUOTA_NUM_LEN_FMT_UINT	"%15u"
+#define QUOTA_NUM_LEN_FMT_ULL	"%15Lu"
+#define QUOTA_TIME_LEN_FMT_STR	"%11s"
+#define QUOTA_NUM_LEN_FMT_STR	"%15s"
+#define QUOTA_PROC_MAX_LINE_LEN 2048
+
+/*
+ * prints /proc/ve_dq header line
+ */
+static int print_proc_header(char * buffer)
+{
+	return sprintf(buffer,
+		       "%-11s"
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_TIME_LEN_FMT_STR
+		       QUOTA_TIME_LEN_FMT_STR
+		       "\n",
+		       "qid: path", 
+		       "usage", "softlimit", "hardlimit", "time", "expire");
+}
+
+/*
+ * prints proc master record id, dentry path
+ */
+static int print_proc_master_id(char * buffer, char * path_buf,
+		struct vz_quota_master * qp)
+{
+	char *path;
+	int over;
+
+	path = NULL;
+	switch (qp->dq_state) {
+		case VZDQ_WORKING:
+			if (!path_buf) {
+				path = "";
+				break;
+			}
+			path = d_path(qp->dq_root_dentry,
+				      qp->dq_root_mnt, path_buf, PAGE_SIZE);
+			if (IS_ERR(path)) {
+				path = "";
+				break;
+			}
+			/* do not print large path, truncate it */
+			over = strlen(path) -
+				(QUOTA_PROC_MAX_LINE_LEN - 3 - 3 -
+				 	QUOTA_UINT_LEN);
+			if (over > 0) {
+				path += over - 3;
+				path[0] = path[1] = path[3] = '.';
+			}
+			break;
+		case VZDQ_STARTING:
+			path = "-- started --";
+			break;
+		case VZDQ_STOPING:
+			path = "-- stopped --";
+			break;
+	}
+
+	return sprintf(buffer, "%u: %s\n", qp->dq_id, path);
+}
+
+/*
+ * prints struct vz_quota_stat data
+ */
+static int print_proc_stat(char * buffer, struct dq_stat *qs,
+		struct dq_info *qi)
+{
+	return sprintf(buffer,
+		       "%11s"
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_TIME_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       "\n"
+		       "%11s"
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       "\n",
+		       "1k-blocks",
+		       qs->bcurrent >> 10,
+		       qs->bsoftlimit >> 10,
+		       qs->bhardlimit >> 10,
+		       (unsigned int)qs->btime,
+		       (unsigned int)qi->bexpire,
+		       "inodes",
+		       qs->icurrent,
+		       qs->isoftlimit,
+		       qs->ihardlimit,
+		       (unsigned int)qs->itime,
+		       (unsigned int)qi->iexpire);
+}
+
+
+/*
+ * for /proc filesystem output
+ */
+static int vzquota_read_proc(char *page, char **start, off_t off, int count,
+			   int *eof, void *data)
+{
+	int len, i;
+	off_t printed = 0;
+	char *p = page;
+	struct vz_quota_master *qp;
+	struct vz_quota_ilink *ql2;
+	struct list_head *listp;
+	char *path_buf;
+
+	path_buf = (char*)__get_free_page(GFP_KERNEL);
+	if (path_buf == NULL)
+		return -ENOMEM;
+
+	len = print_proc_header(p);
+	printed += len;
+	if (off < printed) /* keep header in output */ {
+		*start = p + off;
+		p += len;
+	}
+
+	down(&vz_quota_sem);
+
+	/* traverse master hash table for all records */
+	for (i = 0; i < vzquota_hash_size; i++) {
+		list_for_each(listp, &vzquota_hash_table[i]) {
+			qp = list_entry(listp,
+					struct vz_quota_master, dq_hash);
+
+			/* Skip other VE's information if not root of VE0 */
+			if ((!capable(CAP_SYS_ADMIN) ||
+			     !capable(CAP_SYS_RESOURCE))) {
+				ql2 = INODE_QLNK(current->fs->root->d_inode);
+				if (ql2 == NULL || qp != ql2->qmblk)
+					continue;
+			}
+			/*
+			 * Now print the next record
+			 */
+			len = 0;
+			/* we print quotaid and path only in VE0 */
+			if (capable(CAP_SYS_ADMIN))
+				len += print_proc_master_id(p+len,path_buf, qp);
+			len += print_proc_stat(p+len, &qp->dq_stat,
+					&qp->dq_info);
+			printed += len;
+			/* skip unnecessary lines */
+			if (printed <= off)
+				continue;
+			p += len;
+			/* provide start offset */
+			if (*start == NULL)
+				*start = p + (off - printed);
+			/* have we printed all requested size? */
+			if (PAGE_SIZE - (p - page) < QUOTA_PROC_MAX_LINE_LEN ||
+			    (p - *start) >= count)
+				goto out;
+		}
+	}
+
+	*eof = 1; /* checked all hash */
+out:
+	up(&vz_quota_sem);
+
+	len = 0;
+	if (*start != NULL) {
+		len = (p - *start);
+		if (len > count)
+			len = count;
+	}
+
+	if (path_buf)
+		free_page((unsigned long) path_buf);
+
+	return len;
+}
+
+/*
+ * Register procfs read callback
+ */
+int vzquota_proc_init(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_entry_mod("vz/vzquota", S_IFREG|S_IRUSR, NULL,
+			THIS_MODULE);
+	if (de == NULL) {
+		/* create "vz" subdirectory, if not exist */
+		de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+		if (de == NULL)
+			goto out_err;
+		de = create_proc_entry_mod("vzquota", S_IFREG|S_IRUSR, de,
+				THIS_MODULE);
+		if (de == NULL)
+			goto out_err;
+	}
+	de->read_proc = vzquota_read_proc;
+	de->data = NULL;
+	return 0;
+out_err:
+	return -EBUSY;
+}
+
+void vzquota_proc_release(void)
+{
+	/* Unregister procfs read callback */
+	remove_proc_entry("vz/vzquota", NULL);
+}
+
+#endif
diff -Nurap linux-2.6.9-100.orig/fs/vzdq_ops.c linux-2.6.9-ve023stab054/fs/vzdq_ops.c
--- linux-2.6.9-100.orig/fs/vzdq_ops.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/fs/vzdq_ops.c	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,624 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <asm/semaphore.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/vzquota.h>
+
+
+/* ----------------------------------------------------------------------
+ * Quota superblock operations - helper functions.
+ * --------------------------------------------------------------------- */
+
+static inline void vzquota_incr_inodes(struct dq_stat *dqstat,
+		unsigned long number)
+{
+	dqstat->icurrent += number;
+}
+
+static inline void vzquota_incr_space(struct dq_stat *dqstat,
+		__u64 number)
+{
+	dqstat->bcurrent += number;
+}
+
+static inline void vzquota_decr_inodes(struct dq_stat *dqstat,
+		unsigned long number)
+{
+	if (dqstat->icurrent > number)
+		dqstat->icurrent -= number;
+	else
+		dqstat->icurrent = 0;
+	if (dqstat->icurrent < dqstat->isoftlimit)
+		dqstat->itime = (time_t) 0;
+}
+
+static inline void vzquota_decr_space(struct dq_stat *dqstat,
+		__u64 number)
+{
+	if (dqstat->bcurrent > number)
+		dqstat->bcurrent -= number;
+	else
+		dqstat->bcurrent = 0;
+	if (dqstat->bcurrent < dqstat->bsoftlimit)
+		dqstat->btime = (time_t) 0;
+}
+
+/*
+ * better printk() message or use /proc/vzquotamsg interface
+ * similar to /proc/kmsg
+ */
+static inline void vzquota_warn(struct dq_info *dq_info, int dq_id, int flag,
+		const char *fmt)
+{
+	if (dq_info->flags & flag) /* warning already printed for this
+				       masterblock */
+		return;
+	printk(fmt, dq_id);
+	dq_info->flags |= flag;
+}
+
+/*
+ * ignore_hardlimit -
+ *
+ * Intended to allow superuser of VE0 to overwrite hardlimits.
+ *
+ * ignore_hardlimit() has a very bad feature:
+ *
+ *	writepage() operation for writable mapping of a file with holes
+ *	may trigger get_block() with wrong current and as a consequence,
+ *	opens a possibility to overcommit hardlimits
+ */
+/* for the reason above, it is disabled now */
+static inline int ignore_hardlimit(struct dq_info *dqstat)
+{
+#if 0
+	return	ve_is_super(get_exec_env()) &&
+		capable(CAP_SYS_RESOURCE) &&
+		(dqstat->options & VZ_QUOTA_OPT_RSQUASH);
+#else
+	return 0;
+#endif
+}
+
+static int vzquota_check_inodes(struct dq_info *dq_info,
+		struct dq_stat *dqstat,
+		unsigned long number, int dq_id)
+{
+	if (number == 0)
+		return QUOTA_OK;
+
+	if (dqstat->icurrent + number > dqstat->ihardlimit &&
+	    !ignore_hardlimit(dq_info)) {
+		vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
+			   "VZ QUOTA: file hardlimit reached for id=%d\n");
+		return NO_QUOTA;
+	}
+
+	if (dqstat->icurrent + number > dqstat->isoftlimit) {
+		if (dqstat->itime == (time_t)0) {
+			vzquota_warn(dq_info, dq_id, 0,
+				"VZ QUOTA: file softlimit exceeded "
+				"for id=%d\n");
+			dqstat->itime = CURRENT_TIME_SECONDS + dq_info->iexpire;
+		} else if (CURRENT_TIME_SECONDS >= dqstat->itime &&
+			   !ignore_hardlimit(dq_info)) {
+			vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
+				"VZ QUOTA: file softlimit expired "
+				"for id=%d\n");
+			return NO_QUOTA;
+		}
+	}
+
+	return QUOTA_OK;
+}
+
+static int vzquota_check_space(struct dq_info *dq_info,
+		struct dq_stat *dqstat,
+		__u64 number, int dq_id, char prealloc)
+{
+	if (number == 0)
+		return QUOTA_OK;
+
+	if (dqstat->bcurrent + number > dqstat->bhardlimit &&
+	    !ignore_hardlimit(dq_info)) {
+		if (!prealloc)
+			vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
+				"VZ QUOTA: disk hardlimit reached "
+				"for id=%d\n");
+		return NO_QUOTA;
+	}
+
+	if (dqstat->bcurrent + number > dqstat->bsoftlimit) {
+		if (dqstat->btime == (time_t)0) {
+			if (!prealloc) {
+				vzquota_warn(dq_info, dq_id, 0,
+					"VZ QUOTA: disk softlimit exceeded "
+					"for id=%d\n");
+				dqstat->btime = CURRENT_TIME_SECONDS
+							+ dq_info->bexpire;
+			} else {
+				/*
+				 * Original Linux quota doesn't allow
+				 * preallocation to exceed softlimit so
+				 * exceeding will be always printed
+				 */
+				return NO_QUOTA;
+			}
+		} else if (CURRENT_TIME_SECONDS >= dqstat->btime &&
+			   !ignore_hardlimit(dq_info)) {
+			if (!prealloc)
+				vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
+					"VZ QUOTA: disk quota "
+					"softlimit expired "
+					"for id=%d\n");
+			return NO_QUOTA;
+		}
+	}
+
+	return QUOTA_OK;
+}
+
+static int vzquota_check_ugid_inodes(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		int type, unsigned long number)
+{
+	struct dq_info *dqinfo;
+	struct dq_stat *dqstat;
+
+	if (qugid[type] == NULL)
+		return QUOTA_OK;
+	if (qugid[type] == VZ_QUOTA_UGBAD)
+		return NO_QUOTA;
+
+	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
+		return QUOTA_OK;
+	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
+		return QUOTA_OK;
+	if (number == 0)
+		return QUOTA_OK;
+
+	dqinfo = &qmblk->dq_ugid_info[type];
+	dqstat = &qugid[type]->qugid_stat;
+
+	if (dqstat->ihardlimit != 0 &&
+	    dqstat->icurrent + number > dqstat->ihardlimit)
+		return NO_QUOTA;
+
+	if (dqstat->isoftlimit != 0 &&
+	    dqstat->icurrent + number > dqstat->isoftlimit) {
+		if (dqstat->itime == (time_t)0)
+			dqstat->itime = CURRENT_TIME_SECONDS + dqinfo->iexpire;
+		else if (CURRENT_TIME_SECONDS >= dqstat->itime)
+			return NO_QUOTA;
+	}
+
+	return QUOTA_OK;
+}
+
+static int vzquota_check_ugid_space(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		int type, __u64 number, char prealloc)
+{
+	struct dq_info *dqinfo;
+	struct dq_stat *dqstat;
+	
+	if (qugid[type] == NULL)
+		return QUOTA_OK;
+	if (qugid[type] == VZ_QUOTA_UGBAD)
+		return NO_QUOTA;
+
+	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
+		return QUOTA_OK;
+	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
+		return QUOTA_OK;
+	if (number == 0)
+		return QUOTA_OK;
+
+	dqinfo = &qmblk->dq_ugid_info[type];
+	dqstat = &qugid[type]->qugid_stat;
+
+	if (dqstat->bhardlimit != 0 &&
+	    dqstat->bcurrent + number > dqstat->bhardlimit)
+		return NO_QUOTA;
+
+	if (dqstat->bsoftlimit != 0 &&
+	    dqstat->bcurrent + number > dqstat->bsoftlimit) {
+		if (dqstat->btime == (time_t)0) {
+			if (!prealloc)
+				dqstat->btime = CURRENT_TIME_SECONDS
+							+ dqinfo->bexpire;
+			else
+				/*
+				 * Original Linux quota doesn't allow
+				 * preallocation to exceed softlimit so
+				 * exceeding will be always printed
+				 */
+				return NO_QUOTA;
+		} else if (CURRENT_TIME_SECONDS >= dqstat->btime)
+			return NO_QUOTA;
+	}
+
+	return QUOTA_OK;
+}
+
+/* ----------------------------------------------------------------------
+ * Quota superblock operations
+ * --------------------------------------------------------------------- */
+
+/*
+ * S_NOQUOTA note.
+ * In the current kernel (2.6.8.1), S_NOQUOTA flag is set only for
+ *  - quota file (absent in our case)
+ *  - after explicit DQUOT_DROP (earlier than clear_inode) in functions like
+ *    filesystem-specific new_inode, before the inode gets outside links.
+ * For the latter case, the only quota operation where care about S_NOQUOTA
+ * might be required is vzquota_drop, but there S_NOQUOTA has already been
+ * checked in DQUOT_DROP().
+ * So, S_NOQUOTA may be ignored for now in the VZDQ code.
+ *
+ * The above note is not entirely correct.
+ * Both for ext2 and ext3 filesystems, DQUOT_FREE_INODE is called from
+ * delete_inode if new_inode fails (for example, because of inode quota
+ * limits), so S_NOQUOTA check is needed in free_inode.
+ * This seems to be the dark corner of the current quota API.
+ */
+
+/*
+ * Initialize quota operations for the specified inode.
+ */
+static int vzquota_initialize(struct inode *inode, int type)
+{
+	vzquota_inode_init_call(inode);
+	return 0; /* ignored by caller */
+}
+
+/*
+ * Release quota for the specified inode.
+ */
+static int vzquota_drop(struct inode *inode)
+{
+	vzquota_inode_drop_call(inode);
+	return 0; /* ignored by caller */
+}
+
+/*
+ * Allocate block callback.
+ *
+ * If (prealloc) disk quota exceeding warning is not printed.
+ * See Linux quota to know why.
+ *
+ * Return:
+ *	QUOTA_OK == 0 on SUCCESS
+ *	NO_QUOTA == 1 if allocation should fail
+ */
+static int vzquota_alloc_space(struct inode *inode,
+			     qsize_t number, int prealloc)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	int ret = QUOTA_OK;
+
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid[MAXQUOTAS];
+#endif
+
+		/* checking first */
+		ret = vzquota_check_space(&qmblk->dq_info, &qmblk->dq_stat,
+				number, qmblk->dq_id, prealloc);
+		if (ret == NO_QUOTA)
+			goto no_quota;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
+			ret = vzquota_check_ugid_space(qmblk, qugid,
+					cnt, number, prealloc);
+			if (ret == NO_QUOTA)
+				goto no_quota;
+		}
+		/* check ok, may increment */
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			if (qugid[cnt] == NULL)
+				continue;
+			vzquota_incr_space(&qugid[cnt]->qugid_stat, number);
+		}
+#endif
+		vzquota_incr_space(&qmblk->dq_stat, number);
+		vzquota_data_unlock(inode, &data);
+	}
+
+	inode_add_bytes(inode, number);
+	might_sleep();
+	return QUOTA_OK;
+
+no_quota:
+	vzquota_data_unlock(inode, &data);
+	return NO_QUOTA;
+}
+
+/*
+ * Allocate inodes callback.
+ *
+ * Return:
+ *	QUOTA_OK == 0 on SUCCESS
+ *	NO_QUOTA == 1 if allocation should fail
+ */
+static int vzquota_alloc_inode(const struct inode *inode, unsigned long number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	int ret = QUOTA_OK;
+
+	qmblk = vzquota_inode_data((struct inode *)inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid *qugid[MAXQUOTAS];
+#endif
+
+		/* checking first */
+		ret = vzquota_check_inodes(&qmblk->dq_info, &qmblk->dq_stat,
+				number, qmblk->dq_id);
+		if (ret == NO_QUOTA)
+			goto no_quota;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
+			ret = vzquota_check_ugid_inodes(qmblk, qugid,
+					cnt, number);
+			if (ret == NO_QUOTA)
+				goto no_quota;
+		}
+		/* check ok, may increment */
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			if (qugid[cnt] == NULL)
+				continue;
+			vzquota_incr_inodes(&qugid[cnt]->qugid_stat, number);
+		}
+#endif
+		vzquota_incr_inodes(&qmblk->dq_stat, number);
+		vzquota_data_unlock((struct inode *)inode, &data);
+	}
+
+	might_sleep();
+	return QUOTA_OK;
+
+no_quota:
+	vzquota_data_unlock((struct inode *)inode, &data);
+	return NO_QUOTA;
+}
+
+/*
+ * Free space callback.
+ */
+static int vzquota_free_space(struct inode *inode, qsize_t number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA; /* isn't checked by the caller */
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid;
+#endif
+
+		vzquota_decr_space(&qmblk->dq_stat, number);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid = INODE_QLNK(inode)->qugid[cnt];
+			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
+				continue;
+			vzquota_decr_space(&qugid->qugid_stat, number);
+		}
+#endif
+		vzquota_data_unlock(inode, &data);
+	}
+	inode_sub_bytes(inode, number);
+	might_sleep();
+	return QUOTA_OK;
+}
+
+/*
+ * Free inodes callback.
+ */
+static int vzquota_free_inode(const struct inode *inode, unsigned long number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	qmblk = vzquota_inode_data((struct inode *)inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid;
+#endif
+
+		vzquota_decr_inodes(&qmblk->dq_stat, number);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid = INODE_QLNK(inode)->qugid[cnt];
+			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
+				continue;
+			vzquota_decr_inodes(&qugid->qugid_stat, number);
+		}
+#endif
+		vzquota_data_unlock((struct inode *)inode, &data);
+	}
+	might_sleep();
+	return QUOTA_OK;
+}
+
+void vzquota_inode_off(struct inode * inode)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	/* The call is made through virtinfo, it can be an inode
+	 * not controlled by vzquota.
+	 */
+	if (inode->i_sb->dq_op != &vz_quota_operations)
+		return;
+
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return;
+
+	if (qmblk == NULL) {
+		/* Tricky place. If qmblk == NULL, it means that this inode
+		 * is not in area controlled by vzquota (except for rare
+		 * case of already set S_NOQUOTA). But we have to set
+		 * S_NOQUOTA in any case because vzquota can be turned
+		 * on later, when this inode is invalid from viewpoint
+		 * of vzquota.
+		 *
+		 * To be safe, we reacquire vzquota lock.
+		 */
+		inode_qmblk_lock(inode->i_sb);
+		inode->i_flags |= S_NOQUOTA;
+		inode_qmblk_unlock(inode->i_sb);
+		return;
+	} else {
+		loff_t bytes = inode_get_bytes(inode);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid;
+#endif
+
+		inode->i_flags |= S_NOQUOTA;
+
+		vzquota_decr_space(&qmblk->dq_stat, bytes);
+		vzquota_decr_inodes(&qmblk->dq_stat, 1);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid = INODE_QLNK(inode)->qugid[cnt];
+			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
+				continue;
+			vzquota_decr_space(&qugid->qugid_stat, bytes);
+			vzquota_decr_inodes(&qugid->qugid_stat, 1);
+		}
+#endif
+
+		vzquota_data_unlock(inode, &data);
+
+		vzquota_inode_drop_call(inode);
+	}
+}
+
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+
+/*
+ * helper function for quota_transfer
+ * check that we can add inode to this quota_id
+ */
+static int vzquota_transfer_check(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		unsigned int type, __u64 size)
+{
+	if (vzquota_check_ugid_space(qmblk, qugid, type, size, 0) != QUOTA_OK ||
+	    vzquota_check_ugid_inodes(qmblk, qugid, type, 1) != QUOTA_OK)
+		return -1;
+	return 0;
+}
+
+int vzquota_transfer_usage(struct inode *inode,
+		int mask,
+		struct vz_quota_ilink *qlnk)
+{
+	struct vz_quota_ugid *qugid_old;
+	__u64 space;
+	int i;
+
+	space = inode_get_bytes(inode);
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (!(mask & (1 << i)))
+			continue;
+		/*
+		 * Do not permit chown a file if its owner does not have
+		 * ugid record. This might happen if we somehow exceeded
+		 * the UID/GID (e.g. set uglimit less than number of users).
+		 */
+		if (INODE_QLNK(inode)->qugid[i] == VZ_QUOTA_UGBAD)
+			return -1;
+		if (vzquota_transfer_check(qlnk->qmblk, qlnk->qugid, i, space))
+			return -1;
+	}
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (!(mask & (1 << i)))
+			continue;
+		qugid_old = INODE_QLNK(inode)->qugid[i];
+		vzquota_decr_space(&qugid_old->qugid_stat, space);
+		vzquota_decr_inodes(&qugid_old->qugid_stat, 1);
+		vzquota_incr_space(&qlnk->qugid[i]->qugid_stat, space);
+		vzquota_incr_inodes(&qlnk->qugid[i]->qugid_stat, 1);
+	}
+	return 0;
+}
+
+/*
+ * Transfer the inode between diffent user/group quotas.
+ */
+static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
+{
+	return vzquota_inode_transfer_call(inode, iattr) ?
+		NO_QUOTA : QUOTA_OK;
+}
+
+#else /* CONFIG_VZ_QUOTA_UGID */
+
+static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
+{
+	return QUOTA_OK;
+}
+
+#endif
+
+/*
+ * Called under following semaphores:
+ *	old_d->d_inode->i_sb->s_vfs_rename_sem
+ *	old_d->d_inode->i_sem
+ *	new_d->d_inode->i_sem
+ * [not verified  --SAW]
+ */
+static int vzquota_rename(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	return vzquota_rename_check(inode, old_dir, new_dir) ?
+		NO_QUOTA : QUOTA_OK;
+}
+
+/*
+ * Structure of superblock diskquota operations.
+ */
+struct dquot_operations vz_quota_operations = {
+	initialize:	vzquota_initialize,
+	drop:		vzquota_drop,
+	alloc_space:	vzquota_alloc_space,
+	alloc_inode:	vzquota_alloc_inode,
+	free_space:	vzquota_free_space,
+	free_inode:	vzquota_free_inode,
+	transfer:	vzquota_transfer,
+	rename:		vzquota_rename
+};
diff -Nurap linux-2.6.9-100.orig/fs/vzdq_tree.c linux-2.6.9-ve023stab054/fs/vzdq_tree.c
--- linux-2.6.9-100.orig/fs/vzdq_tree.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/fs/vzdq_tree.c	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,286 @@
+/*
+ *
+ * Copyright (C) 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo quota tree implementation
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/vzdq_tree.h>
+
+struct quotatree_tree *quotatree_alloc(void)
+{
+	int l;
+	struct quotatree_tree *tree;
+
+	tree = kmalloc(sizeof(struct quotatree_tree), GFP_KERNEL);
+	if (tree == NULL)
+		goto out;
+
+	for (l = 0; l < QUOTATREE_DEPTH; l++) {
+		INIT_LIST_HEAD(&tree->levels[l].usedlh);
+		INIT_LIST_HEAD(&tree->levels[l].freelh);
+		tree->levels[l].freenum = 0;
+	}
+	tree->root = NULL;
+	tree->leaf_num = 0;
+out:
+	return tree;
+}
+
+static struct quotatree_node *
+quotatree_follow(struct quotatree_tree *tree, quotaid_t id, int level,
+		struct quotatree_find_state *st)
+{
+	void **block;
+	struct quotatree_node *parent;
+	int l, index;
+
+	parent = NULL;
+	block = (void **)&tree->root;
+	l = 0;
+	while (l < level && *block != NULL) {
+		index = (id >>  QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
+		parent = *block;
+		block = parent->blocks + index;
+		l++;
+	}
+	if (st != NULL) {
+		st->block = block;
+		st->level = l;
+	}
+
+	return parent;
+}
+
+void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st)
+{
+	quotatree_follow(tree, id, QUOTATREE_DEPTH, st);
+	if (st->level == QUOTATREE_DEPTH)
+		return *st->block;
+	else
+		return NULL;
+}
+
+void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index)
+{
+	int i, count;
+	struct quotatree_node *p;
+	void *leaf;
+
+	if (QTREE_LEAFNUM(tree) <= index)
+		return NULL;
+
+	count = 0;
+	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {	
+			leaf = p->blocks[i];
+			if (leaf == NULL)
+				continue;
+			if (count == index)
+				return leaf;
+			count++;
+		}
+	}
+	return NULL;
+}
+
+/* returns data leaf (vz_quota_ugid) after _existent_ ugid (@id)
+ * in the tree... */
+void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id)
+{
+	int off;
+	struct quotatree_node *parent, *p;
+	struct list_head *lh;
+
+	/* get parent refering correct quota tree node of the last level */
+	parent = quotatree_follow(tree, id, QUOTATREE_DEPTH, NULL);
+	if (!parent)
+		return NULL;
+
+	off = (id & QUOTATREE_BMASK) + 1;	/* next ugid */
+	lh = &parent->list;
+	do {
+		p = list_entry(lh, struct quotatree_node, list);
+		for ( ; off < QUOTATREE_BSIZE; off++)
+			if (p->blocks[off])
+				return p->blocks[off];
+		off = 0;
+		lh = lh->next;
+	} while (lh != &QTREE_LEAFLVL(tree)->usedlh);
+
+	return NULL;
+}
+
+int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st, void *data)
+{
+	struct quotatree_node *p;
+	int l, index;
+
+	while (st->level < QUOTATREE_DEPTH) {
+		l = st->level;
+		if (!list_empty(&tree->levels[l].freelh)) {
+			p = list_entry(tree->levels[l].freelh.next,
+					struct quotatree_node, list);
+			list_del(&p->list);
+		} else {
+			p = kmalloc(sizeof(struct quotatree_node), GFP_NOFS | __GFP_NOFAIL);
+			if (p == NULL)
+				return -ENOMEM;
+			/* save block number in the l-level
+			 * it uses for quota file generation */
+			p->num = tree->levels[l].freenum++;
+		}
+		list_add(&p->list, &tree->levels[l].usedlh);
+		memset(p->blocks, 0, sizeof(p->blocks));
+		*st->block = p;
+
+		index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
+		st->block = p->blocks + index;
+		st->level++;
+	}
+	tree->leaf_num++;
+	*st->block = data;
+
+	return 0;
+}
+
+static struct quotatree_node *
+quotatree_remove_ptr(struct quotatree_tree *tree, quotaid_t id,
+		int level)
+{
+	struct quotatree_node *parent;
+	struct quotatree_find_state st;
+
+	parent = quotatree_follow(tree, id, level, &st);
+	if (st.level == QUOTATREE_DEPTH)
+		tree->leaf_num--;
+	*st.block = NULL;
+	return parent;
+}
+
+void quotatree_remove(struct quotatree_tree *tree, quotaid_t id)
+{
+	struct quotatree_node *p;
+	int level, i;
+
+	p = quotatree_remove_ptr(tree, id, QUOTATREE_DEPTH);
+	for (level = QUOTATREE_DEPTH - 1; level >= QUOTATREE_CDEPTH; level--) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++)
+			if (p->blocks[i] != NULL)
+				return;
+		list_move(&p->list, &tree->levels[level].freelh);
+		p = quotatree_remove_ptr(tree, id, level);
+	}
+}
+
+#if 0
+static void quotatree_walk(struct quotatree_tree *tree,
+		struct quotatree_node *node_start,
+		quotaid_t id_start,
+		int level_start, int level_end,
+		int (*callback)(struct quotatree_tree *,
+				quotaid_t id,
+				int level,
+				void *ptr,
+				void *data),
+		void *data)
+{
+	struct quotatree_node *p;
+	int l, shift, index;
+	quotaid_t id;
+	struct quotatree_find_state st;
+
+	p = node_start;
+	l = level_start;
+	shift = (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
+	id = id_start;
+	index = 0;
+
+	/*
+	 * Invariants:
+	 * shift == (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
+	 * id & ((1 << shift) - 1) == 0
+	 * p is l-level node corresponding to id
+	 */
+	do {
+		if (!p)
+			break;
+
+		if (l < level_end) {
+			for (; index < QUOTATREE_BSIZE; index++)
+				if (p->blocks[index] != NULL)
+					break;
+			if (index < QUOTATREE_BSIZE) {
+				/* descend */
+				p = p->blocks[index];
+				l++;
+				shift -= QUOTAID_BBITS;
+				id += (quotaid_t)index << shift;
+				index = 0;
+				continue;
+			}
+		}
+
+		if ((*callback)(tree, id, l, p, data))
+			break;
+
+		/* ascend and to the next node */
+		p = quotatree_follow(tree, id, l, &st);
+
+		index = ((id >> shift) & QUOTATREE_BMASK) + 1;
+		l--;
+		shift += QUOTAID_BBITS;
+		id &= ~(((quotaid_t)1 << shift) - 1);
+	} while (l >= level_start);
+}
+#endif
+
+static void free_list(struct list_head *node_list)
+{
+	struct quotatree_node *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, node_list, list) {
+		list_del(&p->list);
+		kfree(p);
+	}
+}
+
+static inline void quotatree_free_nodes(struct quotatree_tree *tree)
+{
+	int i;
+
+	for (i = 0; i < QUOTATREE_DEPTH; i++) {
+		free_list(&tree->levels[i].usedlh);
+		free_list(&tree->levels[i].freelh);
+	}
+}
+
+static void quotatree_free_leafs(struct quotatree_tree *tree,
+		void (*dtor)(void *))
+{
+	int i;
+	struct quotatree_node *p;
+
+	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {
+			if (p->blocks[i] == NULL)
+				continue;
+
+			dtor(p->blocks[i]);
+		}
+	}
+}
+
+void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *))
+{
+	quotatree_free_leafs(tree, dtor);
+	quotatree_free_nodes(tree);
+	kfree(tree);
+}
diff -Nurap linux-2.6.9-100.orig/fs/vzdq_ugid.c linux-2.6.9-ve023stab054/fs/vzdq_ugid.c
--- linux-2.6.9-100.orig/fs/vzdq_ugid.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/fs/vzdq_ugid.c	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,1179 @@
+/*
+ * Copyright (C) 2002 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo UID/GID disk quota implementation
+ */
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/smp_lock.h>
+#include <linux/rcupdate.h>
+#include <asm/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/quota.h>
+#include <linux/quotaio_v2.h>
+#include <linux/virtinfo.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
+
+#include <linux/vzctl.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+
+/*
+ * XXX
+ * may be something is needed for sb->s_dquot->info[]?
+ */
+
+#define USRQUOTA_MASK		(1 << USRQUOTA)
+#define GRPQUOTA_MASK		(1 << GRPQUOTA)
+#define QTYPE2MASK(type)	(1 << (type))
+
+static kmem_cache_t *vz_quota_ugid_cachep;
+
+/* guard to protect vz_quota_master from destroy in quota_on/off. Also protects
+ * list on the hash table */
+extern struct semaphore vz_quota_sem;
+
+inline struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid)
+{
+	if (qugid != VZ_QUOTA_UGBAD)
+		atomic_inc(&qugid->qugid_count);
+	return qugid;
+}
+
+/* we don't limit users with zero limits */
+static inline int vzquota_fake_stat(struct dq_stat *stat)
+{
+	return stat->bhardlimit == 0 && stat->bsoftlimit == 0 &&
+		stat->ihardlimit == 0 && stat->isoftlimit == 0;
+}
+
+/* callback function for quotatree_free() */
+static inline void vzquota_free_qugid(void *ptr)
+{
+	kmem_cache_free(vz_quota_ugid_cachep, ptr);
+}
+
+/*
+ * destroy ugid, if it have zero refcount, limits and usage
+ * must be called under qmblk->dq_sem
+ */
+void vzquota_put_ugid(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid)
+{
+	if (qugid == VZ_QUOTA_UGBAD)
+		return;
+	qmblk_data_read_lock(qmblk);
+	if (atomic_dec_and_test(&qugid->qugid_count) &&
+	    (qmblk->dq_flags & VZDQUG_FIXED_SET) == 0 &&
+	    vzquota_fake_stat(&qugid->qugid_stat) &&
+	    qugid->qugid_stat.bcurrent == 0 &&
+	    qugid->qugid_stat.icurrent == 0) {
+		quotatree_remove(QUGID_TREE(qmblk, qugid->qugid_type),
+				qugid->qugid_id);
+		qmblk->dq_ugid_count--;
+		vzquota_free_qugid(qugid);
+	}
+	qmblk_data_read_unlock(qmblk);
+}
+
+/*
+ * Get ugid block by its index, like it would present in array.
+ * In reality, this is not array - this is leafs chain of the tree.
+ * NULL if index is out of range.
+ * qmblk semaphore is required to protect the tree.
+ */
+static inline struct vz_quota_ugid *
+vzquota_get_byindex(struct vz_quota_master *qmblk, unsigned int index, int type)
+{
+	return quotatree_leaf_byindex(QUGID_TREE(qmblk, type), index);
+}
+
+/*
+ * get next element from ugid "virtual array"
+ * ugid must be in current array and this array may not be changed between
+ * two accesses (quaranteed by "stopped" quota state and quota semaphore)
+ * qmblk semaphore is required to protect the tree
+ */
+static inline struct vz_quota_ugid *
+vzquota_get_next(struct vz_quota_master *qmblk, struct vz_quota_ugid *qugid)
+{
+	return quotatree_get_next(QUGID_TREE(qmblk, qugid->qugid_type),
+			qugid->qugid_id);
+}
+
+/*
+ * requires dq_sem
+ */
+struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
+			unsigned int quota_id, int type, int flags)
+{
+	struct vz_quota_ugid *qugid;
+	struct quotatree_tree *tree;
+	struct quotatree_find_state st;
+
+	tree = QUGID_TREE(qmblk, type);
+	qugid = quotatree_find(tree, quota_id, &st);
+	if (qugid)
+		goto success;
+
+	/* caller does not want alloc */
+	if (flags & VZDQUG_FIND_DONT_ALLOC)
+		goto fail;
+
+	if (flags & VZDQUG_FIND_FAKE)
+		goto doit;
+
+	/* check limit */
+	if (qmblk->dq_ugid_count >= qmblk->dq_ugid_max)
+		goto fail;
+
+	/* see comment at VZDQUG_FIXED_SET define */
+	if (qmblk->dq_flags & VZDQUG_FIXED_SET)
+		goto fail;
+
+doit:
+	/* alloc new structure */
+	qugid = kmem_cache_alloc(vz_quota_ugid_cachep,
+			SLAB_NOFS | __GFP_NOFAIL);
+	if (qugid == NULL)
+		goto fail;
+
+	/* initialize new structure */
+	qugid->qugid_id = quota_id;
+	memset(&qugid->qugid_stat, 0, sizeof(qugid->qugid_stat));
+	qugid->qugid_type = type;
+	atomic_set(&qugid->qugid_count, 0);
+
+	/* insert in tree */
+	if (quotatree_insert(tree, quota_id, &st, qugid) < 0)
+		goto fail_insert;
+	qmblk->dq_ugid_count++;
+
+success:
+	vzquota_get_ugid(qugid);
+	return qugid;
+
+fail_insert:
+	vzquota_free_qugid(qugid);
+fail:
+	return VZ_QUOTA_UGBAD;
+}
+
+/*
+ * takes dq_sem, may schedule
+ */
+struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
+			unsigned int quota_id, int type, int flags)
+{
+	struct vz_quota_ugid *qugid;
+
+	down(&qmblk->dq_sem);
+	qugid = __vzquota_find_ugid(qmblk, quota_id, type, flags);
+	up(&qmblk->dq_sem);
+
+	return qugid;
+}
+
+/*
+ * destroy all ugid records on given quota master
+ */
+void vzquota_kill_ugid(struct vz_quota_master *qmblk)
+{
+	BUG_ON((qmblk->dq_gid_tree == NULL && qmblk->dq_uid_tree != NULL) ||
+		(qmblk->dq_uid_tree == NULL && qmblk->dq_gid_tree != NULL));
+
+	if (qmblk->dq_uid_tree != NULL) {
+		quotatree_free(qmblk->dq_uid_tree, vzquota_free_qugid);
+		quotatree_free(qmblk->dq_gid_tree, vzquota_free_qugid);
+	}
+}
+
+
+/* ----------------------------------------------------------------------
+ * Management interface to ugid quota for (super)users.
+ * --------------------------------------------------------------------- */
+
+/**
+ * vzquota_find_qmblk - helper to emulate quota on virtual filesystems
+ *
+ * This function finds a quota master block corresponding to the root of
+ * a virtual filesystem.
+ * Returns a quota master block with reference taken, or %NULL if not under
+ * quota, or %VZ_QUOTA_BAD if quota inconsistency is found (and all allocation
+ * operations will fail).
+ *
+ * Note: this function uses vzquota_inode_qmblk().
+ * The latter is a rather confusing function: it returns qmblk that used to be
+ * on the inode some time ago (without guarantee that it still has any
+ * relations to the inode).  So, vzquota_find_qmblk() leaves it up to the
+ * caller to think whether the inode could have changed its qmblk and what to
+ * do in that case.
+ * Currently, the callers appear to not care :(
+ */
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *sb)
+{
+	struct inode *qrinode;
+	struct vz_quota_master *qmblk;
+
+	qmblk = NULL;
+	qrinode = NULL;
+	if (sb->s_op->get_quota_root != NULL)
+		qrinode = sb->s_op->get_quota_root(sb);
+	if (qrinode != NULL)
+		qmblk = vzquota_inode_qmblk(qrinode);
+	return qmblk;
+}
+
+static int vzquota_initialize2(struct inode *inode, int type)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_drop2(struct inode *inode)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_alloc_space2(struct inode *inode,
+			     qsize_t number, int prealloc)
+{
+	inode_add_bytes(inode, number);
+	return QUOTA_OK;
+}
+
+static int vzquota_alloc_inode2(const struct inode *inode, unsigned long number)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_free_space2(struct inode *inode, qsize_t number)
+{
+	inode_sub_bytes(inode, number);
+	return QUOTA_OK;
+}
+
+static int vzquota_free_inode2(const struct inode *inode, unsigned long number)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_transfer2(struct inode *inode, struct iattr *iattr)
+{
+	return QUOTA_OK;
+}
+
+struct dquot_operations vz_quota_operations2 = {
+	initialize:	vzquota_initialize2,
+	drop:		vzquota_drop2,
+	alloc_space:	vzquota_alloc_space2,
+	alloc_inode:	vzquota_alloc_inode2,
+	free_space:	vzquota_free_space2,
+	free_inode:	vzquota_free_inode2,
+	transfer:	vzquota_transfer2
+};
+
+
+asmlinkage long sys_unlink(const char __user * pathname);
+asmlinkage long sys_rename(const char __user * oldname,
+	       const char __user * newname);
+asmlinkage long sys_symlink(const char __user * oldname,
+	       const char __user * newname);
+static int vz_restore_symlink(struct super_block *sb, char *path, int type)
+{
+	mm_segment_t oldfs;
+	char *newpath;
+	char dest[64];
+	const char *names[] = {
+		[USRQUOTA] "aquota.user",
+		[GRPQUOTA] "aquota.group"
+	};
+	int err;
+
+	newpath = kmalloc(strlen(path) + sizeof(".new"), GFP_KERNEL);
+	if (newpath == NULL)
+		return -ENOMEM;
+
+	strcpy(newpath, path);
+	strcat(newpath, ".new");
+
+	sprintf(dest, "/proc/vz/vzaquota/%08x/%s",
+			new_encode_dev(sb->s_dev), names[type]);
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sys_unlink(newpath);
+	if (err < 0 && err != -ENOENT)
+		goto out_restore;
+	err = sys_symlink(dest, newpath);
+	if (err < 0)
+		goto out_restore;
+	err = sys_rename(newpath, path);
+out_restore:
+	set_fs(oldfs);
+	kfree(newpath);
+	return err;
+}
+
+static int vz_quota_on(struct super_block *sb, int type,
+		int format_id, char *path)
+{
+	struct vz_quota_master *qmblk;
+	int mask, mask2;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = vz_restore_symlink(sb, path, type);
+	if (err < 0)
+		goto out;
+
+	mask = 0;
+	mask2 = 0;
+	sb->dq_op = &vz_quota_operations2;
+	sb->s_qcop = &vz_quotactl_operations;
+	if (type == USRQUOTA) {
+		mask = DQUOT_USR_ENABLED;
+		mask2 = VZDQ_USRQUOTA;
+	}
+	if (type == GRPQUOTA) {
+		mask = DQUOT_GRP_ENABLED;
+		mask2 = VZDQ_GRPQUOTA;
+	}
+	err = -EBUSY;
+	if (qmblk->dq_flags & mask2)
+		goto out;
+
+	err = 0;
+	qmblk->dq_flags |= mask2;
+	sb->s_dquot.flags |= mask;
+
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+static int vz_quota_off(struct super_block *sb, int type)
+{
+	struct vz_quota_master *qmblk;
+	int mask2;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	mask2 = 0;
+	if (type == USRQUOTA)
+		mask2 = VZDQ_USRQUOTA;
+	if (type == GRPQUOTA)
+		mask2 = VZDQ_GRPQUOTA;
+	err = -EINVAL;
+	if (!(qmblk->dq_flags & mask2))
+		goto out;
+
+	qmblk->dq_flags &= ~mask2;
+	err = 0;
+
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+static int vz_quota_sync(struct super_block *sb, int type)
+{
+	return 0;	/* vz quota is always uptodate */
+}
+
+static int vz_get_dqblk(struct super_block *sb, int type,
+		qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid *ugid;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = 0;
+	ugid = vzquota_find_ugid(qmblk, id, type, VZDQUG_FIND_DONT_ALLOC);
+	if (ugid != VZ_QUOTA_UGBAD) {
+		qmblk_data_read_lock(qmblk);
+		di->dqb_bhardlimit = ugid->qugid_stat.bhardlimit >> 10;
+		di->dqb_bsoftlimit = ugid->qugid_stat.bsoftlimit >> 10;
+		di->dqb_curspace = ugid->qugid_stat.bcurrent;
+		di->dqb_ihardlimit = ugid->qugid_stat.ihardlimit;
+		di->dqb_isoftlimit = ugid->qugid_stat.isoftlimit;
+		di->dqb_curinodes = ugid->qugid_stat.icurrent;
+		di->dqb_btime = ugid->qugid_stat.btime;
+		di->dqb_itime = ugid->qugid_stat.itime;
+		qmblk_data_read_unlock(qmblk);
+		di->dqb_valid = QIF_ALL;
+		vzquota_put_ugid(qmblk, ugid);
+	} else {
+		memset(di, 0, sizeof(*di));
+		di->dqb_valid = QIF_ALL;
+	}
+
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+/* must be called under vz_quota_sem */
+static int __vz_set_dqblk(struct vz_quota_master *qmblk,
+		int type, qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_ugid *ugid;
+
+	ugid = vzquota_find_ugid(qmblk, id, type, 0);
+	if (ugid == VZ_QUOTA_UGBAD)
+		return -ESRCH;
+
+	qmblk_data_write_lock(qmblk);
+	/*
+	 * Subtle compatibility breakage.
+	 *
+	 * Some old non-vz kernel quota didn't start grace period
+	 * if the new soft limit happens to be below the usage.
+	 * Non-vz kernel quota in 2.4.20 starts the grace period
+	 * (if it hasn't been started).
+	 * Current non-vz kernel performs even more complicated
+	 * manipulations...
+	 *
+	 * Also, current non-vz kernels have inconsistency related to 
+	 * the grace time start.  In regular operations the grace period
+	 * is started if the usage is greater than the soft limit (and,
+	 * strangely, is cancelled if the usage is less).
+	 * However, set_dqblk starts the grace period if the usage is greater
+	 * or equal to the soft limit.
+	 *
+	 * Here we try to mimic the behavior of the current non-vz kernel.
+	 */
+	if (di->dqb_valid & QIF_BLIMITS) {
+		ugid->qugid_stat.bhardlimit =
+			(__u64)di->dqb_bhardlimit << 10;
+		ugid->qugid_stat.bsoftlimit =
+			(__u64)di->dqb_bsoftlimit << 10;
+		if (di->dqb_bsoftlimit == 0 ||
+		    ugid->qugid_stat.bcurrent < ugid->qugid_stat.bsoftlimit)
+			ugid->qugid_stat.btime = 0;
+		else if (!(di->dqb_valid & QIF_BTIME))
+			ugid->qugid_stat.btime = CURRENT_TIME_SECONDS
+				+ qmblk->dq_ugid_info[type].bexpire;
+		else
+			ugid->qugid_stat.btime = di->dqb_btime;
+	}
+	if (di->dqb_valid & QIF_ILIMITS) {
+		ugid->qugid_stat.ihardlimit = di->dqb_ihardlimit;
+		ugid->qugid_stat.isoftlimit = di->dqb_isoftlimit;
+		if (di->dqb_isoftlimit == 0 ||
+		    ugid->qugid_stat.icurrent < ugid->qugid_stat.isoftlimit)
+			ugid->qugid_stat.itime = 0;
+		else if (!(di->dqb_valid & QIF_ITIME))
+			ugid->qugid_stat.itime = CURRENT_TIME_SECONDS
+				+ qmblk->dq_ugid_info[type].iexpire;
+		else
+			ugid->qugid_stat.itime = di->dqb_itime;
+	}
+	qmblk_data_write_unlock(qmblk);
+	vzquota_put_ugid(qmblk, ugid);
+
+	return 0;
+}
+
+static int vz_set_dqblk(struct super_block *sb, int type,
+		qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+	err = __vz_set_dqblk(qmblk, type, id, di);
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+static int vz_get_dqinfo(struct super_block *sb, int type,
+		struct if_dqinfo *ii)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = 0;
+	ii->dqi_bgrace = qmblk->dq_ugid_info[type].bexpire;
+	ii->dqi_igrace = qmblk->dq_ugid_info[type].iexpire;
+	ii->dqi_flags = 0;
+	ii->dqi_valid = IIF_ALL;
+
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+/* must be called under vz_quota_sem */
+static int __vz_set_dqinfo(struct vz_quota_master *qmblk,
+		int type, struct if_dqinfo *ii)
+{
+	if (ii->dqi_valid & IIF_FLAGS)
+		if (ii->dqi_flags & DQF_MASK)
+			return -EINVAL;
+
+	if (ii->dqi_valid & IIF_BGRACE)
+		qmblk->dq_ugid_info[type].bexpire = ii->dqi_bgrace;
+	if (ii->dqi_valid & IIF_IGRACE)
+		qmblk->dq_ugid_info[type].iexpire = ii->dqi_igrace;
+	return 0;
+}
+
+static int vz_set_dqinfo(struct super_block *sb, int type,
+		struct if_dqinfo *ii)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+	err = __vz_set_dqinfo(qmblk, type, ii);
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+#ifdef CONFIG_QUOTA_COMPAT
+
+#define Q_GETQUOTI_SIZE 1024
+
+#define UGID2DQBLK(dst, src)						\
+	do {								\
+		(dst)->dqb_ihardlimit = (src)->qugid_stat.ihardlimit;	\
+		(dst)->dqb_isoftlimit = (src)->qugid_stat.isoftlimit;	\
+		(dst)->dqb_curinodes = (src)->qugid_stat.icurrent;	\
+		/* in 1K blocks */					\
+		(dst)->dqb_bhardlimit = (src)->qugid_stat.bhardlimit >> 10; \
+		/* in 1K blocks */					\
+		(dst)->dqb_bsoftlimit = (src)->qugid_stat.bsoftlimit >> 10; \
+		/* in bytes, 64 bit */					\
+		(dst)->dqb_curspace = (src)->qugid_stat.bcurrent;	\
+		(dst)->dqb_btime = (src)->qugid_stat.btime;		\
+		(dst)->dqb_itime = (src)->qugid_stat.itime;		\
+	} while (0)
+
+static int vz_get_quoti(struct super_block *sb, int type, qid_t idx,
+		struct v2_disk_dqblk *dqblk)
+{
+	struct vz_quota_master *qmblk;
+	struct v2_disk_dqblk *data, *kbuf;
+	struct vz_quota_ugid *ugid;
+	int count;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = -ENOMEM;
+	kbuf = vmalloc(Q_GETQUOTI_SIZE * sizeof(*kbuf));
+	if (!kbuf)
+		goto out;
+
+	down(&vz_quota_sem);
+	down(&qmblk->dq_sem);
+	for (ugid = vzquota_get_byindex(qmblk, idx, type), count = 0;
+		ugid != NULL && count < Q_GETQUOTI_SIZE;
+		count++)
+	{
+		data = kbuf + count;
+		qmblk_data_read_lock(qmblk);
+		UGID2DQBLK(data, ugid);
+		qmblk_data_read_unlock(qmblk);
+		data->dqb_id = ugid->qugid_id;
+
+		/* Find next entry */
+		ugid = vzquota_get_next(qmblk, ugid);
+		BUG_ON(ugid != NULL && ugid->qugid_type != type);
+	}
+	up(&qmblk->dq_sem);
+	up(&vz_quota_sem);
+
+	err = count;
+	if (copy_to_user(dqblk, kbuf, count * sizeof(*kbuf)))
+		err = -EFAULT;
+
+	vfree(kbuf);
+out:
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+
+	return err;
+}
+
+#endif
+
+struct quotactl_ops vz_quotactl_operations = {
+	quota_on:	vz_quota_on,
+	quota_off:	vz_quota_off,
+	quota_sync:	vz_quota_sync,
+	get_info:	vz_get_dqinfo,
+	set_info:	vz_set_dqinfo,
+	get_dqblk:	vz_get_dqblk,
+	set_dqblk:	vz_set_dqblk,
+#ifdef CONFIG_QUOTA_COMPAT
+	get_quoti:	vz_get_quoti
+#endif
+};
+
+
+/* ----------------------------------------------------------------------
+ * Management interface for host system admins.
+ * --------------------------------------------------------------------- */
+
+static int quota_ugid_addstat(unsigned int quota_id, unsigned int ugid_size,
+		struct vz_quota_iface *u_ugid_buf)
+{
+	struct vz_quota_master *qmblk;
+	int ret;
+
+	down(&vz_quota_sem);
+
+	ret = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	ret = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out; /* working quota doesn't accept new ugids */
+
+	ret = 0;
+	/* start to add ugids */
+	for (ret = 0; ret < ugid_size; ret++) {
+		struct vz_quota_iface ugid_buf;
+		struct vz_quota_ugid *ugid;
+
+		if (copy_from_user(&ugid_buf, u_ugid_buf, sizeof(ugid_buf)))
+			break;
+
+		if (ugid_buf.qi_type >= MAXQUOTAS)
+			break; /* bad quota type - this is the only check */
+
+		ugid = vzquota_find_ugid(qmblk,
+				ugid_buf.qi_id, ugid_buf.qi_type, 0);
+		if (ugid == VZ_QUOTA_UGBAD) {
+			qmblk->dq_flags |= VZDQUG_FIXED_SET;
+			break; /* limit reached */
+		}
+
+		/* update usage/limits 
+		 * we can copy the data without the lock, because the data
+		 * cannot be modified in VZDQ_STARTING state */
+		ugid->qugid_stat = ugid_buf.qi_stat;
+
+		vzquota_put_ugid(qmblk, ugid);
+
+		u_ugid_buf++; /* next user buffer */
+	}
+out:
+	up(&vz_quota_sem);
+
+	return ret;
+}
+
+static int quota_ugid_setgrace(unsigned int quota_id,
+		struct dq_info u_dq_info[])
+{
+	struct vz_quota_master *qmblk;
+	struct dq_info dq_info[MAXQUOTAS];
+	struct dq_info *target;
+	int err, type;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+	
+	err = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out; /* working quota doesn't accept changing options */
+
+	err = -EFAULT;
+	if (copy_from_user(dq_info, u_dq_info, sizeof(dq_info)))
+		goto out;
+
+	err = 0;
+
+	/* update in qmblk */
+	for (type = 0; type < MAXQUOTAS; type ++) {
+		target = &qmblk->dq_ugid_info[type];
+		target->bexpire = dq_info[type].bexpire;
+		target->iexpire = dq_info[type].iexpire;
+	}
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int do_quota_ugid_getstat(struct vz_quota_master *qmblk, int index, int size,
+		struct vz_quota_iface *u_ugid_buf)
+{
+	int type, count;
+	struct vz_quota_ugid *ugid;
+
+	if (QTREE_LEAFNUM(qmblk->dq_uid_tree) +
+	    QTREE_LEAFNUM(qmblk->dq_gid_tree)
+	    		<= index)
+		return 0;
+
+	count = 0;
+
+	type = index < QTREE_LEAFNUM(qmblk->dq_uid_tree) ? USRQUOTA : GRPQUOTA;
+	if (type == GRPQUOTA)
+		index -= QTREE_LEAFNUM(qmblk->dq_uid_tree);
+
+	/* loop through ugid and then qgid quota */
+repeat:
+	for (ugid = vzquota_get_byindex(qmblk, index, type);
+		ugid != NULL && count < size;
+		ugid = vzquota_get_next(qmblk, ugid), count++)
+	{
+		struct vz_quota_iface ugid_buf;
+
+		/* form interface buffer and send in to user-level */
+		qmblk_data_read_lock(qmblk);
+		memcpy(&ugid_buf.qi_stat, &ugid->qugid_stat,
+				sizeof(ugid_buf.qi_stat));
+		qmblk_data_read_unlock(qmblk);
+		ugid_buf.qi_id = ugid->qugid_id;
+		ugid_buf.qi_type = ugid->qugid_type;
+
+		memcpy(u_ugid_buf, &ugid_buf, sizeof(ugid_buf));
+		u_ugid_buf++; /* next portion of user buffer */
+	}
+
+	if (type == USRQUOTA && count < size) {
+		type = GRPQUOTA;
+		index = 0;
+		goto repeat;
+	}
+
+	return count;
+}
+
+static int quota_ugid_getstat(unsigned int quota_id,
+		int index, int size, struct vz_quota_iface *u_ugid_buf)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_iface *k_ugid_buf;
+	int err;
+
+	if (index < 0 || size < 0)
+		return -EINVAL;
+
+	if (size > INT_MAX / sizeof(struct vz_quota_iface))
+		return -EINVAL;
+
+	k_ugid_buf = vmalloc(size * sizeof(struct vz_quota_iface));
+	if (k_ugid_buf == NULL)
+		return -ENOMEM;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	down(&qmblk->dq_sem);
+	err = do_quota_ugid_getstat(qmblk, index, size, k_ugid_buf);
+	up(&qmblk->dq_sem);
+	if (err < 0)
+		goto out;
+
+	if (copy_to_user(u_ugid_buf, k_ugid_buf,
+				size * sizeof(struct vz_quota_iface)))
+		err = -EFAULT;
+
+out:
+	up(&vz_quota_sem);
+	vfree(k_ugid_buf);
+	return err;
+}
+
+static int quota_ugid_getgrace(unsigned int quota_id,
+		struct dq_info u_dq_info[])
+{
+	struct vz_quota_master *qmblk;
+	struct dq_info dq_info[MAXQUOTAS];
+	struct dq_info *target;
+	int err, type;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+	
+	err = 0;
+	/* update from qmblk */
+	for (type = 0; type < MAXQUOTAS; type ++) {
+		target = &qmblk->dq_ugid_info[type];
+		dq_info[type].bexpire = target->bexpire;
+		dq_info[type].iexpire = target->iexpire;
+		dq_info[type].flags = target->flags;
+	}
+
+	if (copy_to_user(u_dq_info, dq_info, sizeof(dq_info)))
+		err = -EFAULT;
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_getconfig(unsigned int quota_id, 
+		struct vz_quota_ugid_stat *info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_stat kinfo;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+	
+	err = 0;
+	kinfo.limit = qmblk->dq_ugid_max;
+	kinfo.count = qmblk->dq_ugid_count;
+	kinfo.flags = qmblk->dq_flags;
+
+	if (copy_to_user(info, &kinfo, sizeof(kinfo)))
+		err = -EFAULT;
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_setconfig(unsigned int quota_id,
+		struct vz_quota_ugid_stat *info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_stat kinfo;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&kinfo, info, sizeof(kinfo)))
+		goto out;
+
+	err = 0;
+	qmblk->dq_ugid_max = kinfo.limit;
+	if (qmblk->dq_state == VZDQ_STARTING) {
+		qmblk->dq_flags = kinfo.flags;
+		if (qmblk->dq_flags & VZDQUG_ON)
+			qmblk->dq_flags |= VZDQ_USRQUOTA | VZDQ_GRPQUOTA;
+	}		
+
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_setlimit(unsigned int quota_id,
+		struct vz_quota_ugid_setlimit *u_lim)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_setlimit lim;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ESRCH;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&lim, u_lim, sizeof(lim)))
+		goto out;
+
+	err = __vz_set_dqblk(qmblk, lim.type, lim.id, &lim.dqb);
+
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_setinfo(unsigned int quota_id,
+		struct vz_quota_ugid_setinfo *u_info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_setinfo info;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ESRCH;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		goto out;
+
+	err = __vz_set_dqinfo(qmblk, info.type, &info.dqi);
+
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+/*
+ * This is a system call to maintain UGID quotas
+ * Note this call is allowed to run ONLY from VE0
+ */
+long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub)
+{
+	int ret;
+
+	ret = -EPERM;
+	/* access allowed only from root of VE0 */
+	if (!capable(CAP_SYS_RESOURCE) ||
+	    !capable(CAP_SYS_ADMIN))
+		goto out;
+
+	switch (qub->cmd) {
+		case VZ_DQ_UGID_GETSTAT:
+			ret = quota_ugid_getstat(qub->quota_id,
+					qub->ugid_index, qub->ugid_size,
+				       	(struct vz_quota_iface *)qub->addr);
+			break;
+		case VZ_DQ_UGID_ADDSTAT:
+			ret = quota_ugid_addstat(qub->quota_id, qub->ugid_size,
+				       	(struct vz_quota_iface *)qub->addr);
+			break;
+		case VZ_DQ_UGID_GETGRACE:
+			ret = quota_ugid_getgrace(qub->quota_id,
+					(struct dq_info *)qub->addr);
+			break;
+		case VZ_DQ_UGID_SETGRACE:
+			ret = quota_ugid_setgrace(qub->quota_id,
+					(struct dq_info *)qub->addr);
+			break;
+		case VZ_DQ_UGID_GETCONFIG:
+			ret = quota_ugid_getconfig(qub->quota_id,
+					(struct vz_quota_ugid_stat *)qub->addr);
+			break;
+		case VZ_DQ_UGID_SETCONFIG:
+			ret = quota_ugid_setconfig(qub->quota_id,
+					(struct vz_quota_ugid_stat *)qub->addr);
+			break;
+		case VZ_DQ_UGID_SETLIMIT:
+			ret = quota_ugid_setlimit(qub->quota_id,
+					(struct vz_quota_ugid_setlimit *)
+								qub->addr);
+			break;
+		case VZ_DQ_UGID_SETINFO:
+			ret = quota_ugid_setinfo(qub->quota_id,
+					(struct vz_quota_ugid_setinfo *)
+								qub->addr);
+			break;
+		default:
+			ret = -EINVAL;
+			goto out;
+	}
+out:
+	return ret;
+}
+
+static void ugid_quota_on_sb(struct super_block *sb)
+{
+	struct super_block *real_sb;
+	struct vz_quota_master *qmblk;
+
+	if (!sb->s_op->get_quota_root)
+		return;
+
+	real_sb = sb->s_op->get_quota_root(sb)->i_sb;
+	if (real_sb->dq_op != &vz_quota_operations)
+		return;
+
+	sb->dq_op = &vz_quota_operations2;
+	sb->s_qcop = &vz_quotactl_operations;
+	INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+	INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+	sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
+	sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
+
+	qmblk = vzquota_find_qmblk(sb);
+	if ((qmblk == NULL) || (qmblk == VZ_QUOTA_BAD))
+		return;
+	down(&vz_quota_sem);
+	if (qmblk->dq_flags & VZDQ_USRQUOTA)
+		sb->s_dquot.flags |= DQUOT_USR_ENABLED;
+	if (qmblk->dq_flags & VZDQ_GRPQUOTA)
+		sb->s_dquot.flags |= DQUOT_GRP_ENABLED;
+	up(&vz_quota_sem);
+	qmblk_put(qmblk);
+}
+
+static void ugid_quota_off_sb(struct super_block *sb)
+{
+	/* can't make quota off on mounted super block */
+	BUG_ON(sb->s_root != NULL);
+}
+
+static int ugid_notifier_call(struct vnotifier_block *self,
+		unsigned long n, void *data, int old_ret)
+{
+	struct virt_info_quota *viq;
+
+	viq = (struct virt_info_quota *)data;
+
+	switch (n) {
+	case VIRTINFO_QUOTA_ON:
+		ugid_quota_on_sb(viq->super);
+		break;
+	case VIRTINFO_QUOTA_OFF:
+		ugid_quota_off_sb(viq->super);
+		break;
+	case VIRTINFO_QUOTA_GETSTAT:
+		break;
+	default:
+		return old_ret;
+	}
+	return NOTIFY_OK;
+}
+
+static struct vnotifier_block ugid_notifier_block = {
+	.notifier_call = ugid_notifier_call,
+};
+
+/* ----------------------------------------------------------------------
+ * Init/exit.
+ * --------------------------------------------------------------------- */
+
+struct quota_format_type vz_quota_empty_v2_format = {
+	qf_fmt_id:	QFMT_VFS_V0,
+	qf_ops:		NULL,
+	qf_owner:	THIS_MODULE
+};
+
+int vzquota_ugid_init()
+{
+	int err;
+
+	vz_quota_ugid_cachep = kmem_cache_create("vz_quota_ugid",
+				      sizeof(struct vz_quota_ugid),
+				      0, SLAB_HWCACHE_ALIGN,
+				      NULL, NULL);
+	if (vz_quota_ugid_cachep == NULL)
+		goto err_slab;
+
+	err = register_quota_format(&vz_quota_empty_v2_format);
+	if (err)
+		goto err_reg;
+
+	virtinfo_notifier_register(VITYPE_QUOTA, &ugid_notifier_block);
+	return 0;
+
+err_reg:
+	kmem_cache_destroy(vz_quota_ugid_cachep);
+	return err;
+
+err_slab:
+	printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
+	return -ENOMEM;
+}
+
+void vzquota_ugid_release()
+{
+	virtinfo_notifier_unregister(VITYPE_QUOTA, &ugid_notifier_block);
+	unregister_quota_format(&vz_quota_empty_v2_format);
+
+	if (kmem_cache_destroy(vz_quota_ugid_cachep))
+		printk(KERN_ERR "VZQUOTA: kmem_cache_destroy failed\n");
+}
diff -Nurap linux-2.6.9-100.orig/fs/vzdquot.c linux-2.6.9-ve023stab054/fs/vzdquot.c
--- linux-2.6.9-100.orig/fs/vzdquot.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/fs/vzdquot.c	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,1755 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains the core of Virtuozzo disk quota implementation:
+ * maintenance of VZDQ information in inodes,
+ * external interfaces,
+ * module entry.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <asm/atomic.h>
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/quota.h>
+#include <linux/rcupdate.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+#include <linux/virtinfo.h>
+#include <linux/vzdq_tree.h>
+
+/* ----------------------------------------------------------------------
+ *
+ * Locking
+ *
+ * ---------------------------------------------------------------------- */
+
+/*
+ * Serializes on/off and all other do_vzquotactl operations.
+ * Protects qmblk hash.
+ */
+struct semaphore vz_quota_sem;
+
+/*
+ * Data access locks
+ *  inode_qmblk
+ *	protects qmblk pointers in all inodes and qlnk content in general
+ *	(but not qmblk content);
+ *	also protects related qmblk invalidation procedures;
+ *	can't be per-inode because of vzquota_dtree_qmblk complications
+ *	and problems with serialization with quota_on,
+ *	but can be per-superblock;
+ *  qmblk_data
+ *	protects qmblk fields (such as current usage)
+ *  quota_data
+ *	protects charge/uncharge operations, thus, implies
+ *	qmblk_data lock and, if CONFIG_VZ_QUOTA_UGID, inode_qmblk lock
+ *	(to protect ugid pointers).
+ *
+ * Lock order:
+ *  inode_qmblk_lock -> dcache_lock
+ *  inode_qmblk_lock -> qmblk_data
+ */
+static spinlock_t vzdq_qmblk_lock = SPIN_LOCK_UNLOCKED;
+
+inline void inode_qmblk_lock(struct super_block *sb)
+{
+	spin_lock(&vzdq_qmblk_lock);
+}
+
+inline void inode_qmblk_unlock(struct super_block *sb)
+{
+	spin_unlock(&vzdq_qmblk_lock);
+}
+
+inline void qmblk_data_read_lock(struct vz_quota_master *qmblk)
+{
+	spin_lock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_read_unlock(struct vz_quota_master *qmblk)
+{
+	spin_unlock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_write_lock(struct vz_quota_master *qmblk)
+{
+	spin_lock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_write_unlock(struct vz_quota_master *qmblk)
+{
+	spin_unlock(&qmblk->dq_data_lock);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Master hash table handling.
+ *
+ * SMP not safe, serialied by vz_quota_sem within quota syscalls
+ *
+ * --------------------------------------------------------------------- */
+
+static kmem_cache_t *vzquota_cachep;
+
+/*
+ * Hash function.
+ */
+#define QHASH_BITS		6
+#define	VZ_QUOTA_HASH_SIZE	(1 << QHASH_BITS)
+#define QHASH_MASK		(VZ_QUOTA_HASH_SIZE - 1)
+
+struct list_head vzquota_hash_table[VZ_QUOTA_HASH_SIZE];
+int vzquota_hash_size = VZ_QUOTA_HASH_SIZE;
+
+static inline int vzquota_hash_func(unsigned int qid)
+{
+	return (((qid >> QHASH_BITS) ^ qid) & QHASH_MASK);
+}
+
+/**
+ * vzquota_alloc_master - alloc and instantiate master quota record
+ *
+ * Returns:
+ *	pointer to newly created record if SUCCESS
+ *	-ENOMEM if out of memory
+ *	-EEXIST if record with given quota_id already exist
+ */
+struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
+		struct vz_quota_stat *qstat)
+{
+	int err;
+	struct vz_quota_master *qmblk;
+
+	err = -EEXIST;
+	if (vzquota_find_master(quota_id) != NULL)
+		goto out;
+
+	err = -ENOMEM;
+	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
+	if (qmblk == NULL)
+		goto out;
+#ifdef CONFIG_VZ_QUOTA_UGID
+	qmblk->dq_uid_tree = quotatree_alloc();
+	if (!qmblk->dq_uid_tree)
+		goto out_free;
+
+	qmblk->dq_gid_tree = quotatree_alloc();
+	if (!qmblk->dq_gid_tree)
+		goto out_free_tree;
+#endif
+
+	qmblk->dq_state = VZDQ_STARTING;
+	init_MUTEX(&qmblk->dq_sem);
+	spin_lock_init(&qmblk->dq_data_lock);
+
+	qmblk->dq_id = quota_id;
+	qmblk->dq_stat = qstat->dq_stat;
+	qmblk->dq_info = qstat->dq_info;
+	qmblk->dq_root_dentry = NULL;
+	qmblk->dq_root_mnt = NULL;
+	qmblk->dq_sb = NULL;
+	qmblk->dq_ugid_count = 0;
+	qmblk->dq_ugid_max = 0;
+	qmblk->dq_flags = 0;
+	memset(qmblk->dq_ugid_info, 0, sizeof(qmblk->dq_ugid_info));
+	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
+
+	atomic_set(&qmblk->dq_count, 1);
+
+	/* insert in hash chain */
+	list_add(&qmblk->dq_hash,
+		&vzquota_hash_table[vzquota_hash_func(quota_id)]);
+
+	/* success */
+	return qmblk;
+
+out_free_tree:
+	quotatree_free(qmblk->dq_uid_tree, NULL);
+out_free:
+	kmem_cache_free(vzquota_cachep, qmblk);
+out:
+	return ERR_PTR(err);
+}
+
+static struct vz_quota_master *vzquota_alloc_fake(void)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
+	if (qmblk == NULL)
+		return NULL;
+	memset(qmblk, 0, sizeof(*qmblk));
+	qmblk->dq_state = VZDQ_STOPING;
+	qmblk->dq_flags = VZDQ_NOQUOT;
+	spin_lock_init(&qmblk->dq_data_lock);
+	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
+	atomic_set(&qmblk->dq_count, 1);
+	return qmblk;
+}
+
+/**
+ * vzquota_find_master - find master record with given id
+ *
+ * Returns qmblk without touching its refcounter.
+ * Called under vz_quota_sem.
+ */
+struct vz_quota_master *vzquota_find_master(unsigned int quota_id)
+{
+	int i;
+	struct vz_quota_master *qp;
+
+	i = vzquota_hash_func(quota_id);
+	list_for_each_entry(qp, &vzquota_hash_table[i], dq_hash) {
+		if (qp->dq_id == quota_id)
+			return qp;
+	}
+	return NULL;
+}
+
+/**
+ * vzquota_free_master - release resources taken by qmblk, freeing memory
+ *
+ * qmblk is assumed to be already taken out from the hash.
+ * Should be called outside vz_quota_sem.
+ */
+void vzquota_free_master(struct vz_quota_master *qmblk)
+{
+#ifdef CONFIG_VZ_QUOTA_UGID
+	vzquota_kill_ugid(qmblk);
+#endif
+	BUG_ON(!list_empty(&qmblk->dq_ilink_list));
+	kmem_cache_free(vzquota_cachep, qmblk);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Passing quota information through current
+ *
+ * Used in inode -> qmblk lookup at inode creation stage (since at that
+ * time there are no links between the inode being created and its parent
+ * directory).
+ *
+ * --------------------------------------------------------------------- */
+
+#define VZDQ_CUR_MAGIC	0x57d0fee2
+
+static inline int vzquota_cur_qmblk_check(void)
+{
+	return current->magic == VZDQ_CUR_MAGIC;
+}
+
+static inline struct inode *vzquota_cur_qmblk_fetch(void)
+{
+	return current->ino;
+}
+
+static inline void vzquota_cur_qmblk_set(struct inode *data)
+{
+	struct task_struct *tsk;
+
+	tsk = current;
+	tsk->magic = VZDQ_CUR_MAGIC;
+	tsk->ino = data;
+}
+
+#if 0
+static inline void vzquota_cur_qmblk_reset(void)
+{
+	current->magic = 0;
+}
+#endif
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Superblock quota operations
+ *
+ * --------------------------------------------------------------------- */
+
+/*
+ * Kernel structure abuse.
+ * We use files[0] pointer as an int variable:
+ * reference counter of how many quota blocks uses this superblock.
+ * files[1] is used for generations structure which helps us to track
+ * when traversing of dentries is really required.
+ */
+#define __VZ_QUOTA_NOQUOTA(sb)		(*(struct vz_quota_master **)\
+						&sb->s_dquot.files[1])
+#define __VZ_QUOTA_TSTAMP(sb)		((struct timeval *)\
+						&sb->s_dquot.dqio_sem)
+
+#if defined(VZ_QUOTA_UNLOAD)
+
+#define __VZ_QUOTA_SBREF(sb)		(*(int *)&sb->s_dquot.files[0])
+
+struct dquot_operations *orig_dq_op;
+struct quotactl_ops *orig_dq_cop;
+
+/**
+ * quota_get_super - account for new a quoted tree under the superblock
+ *
+ * One superblock can have multiple directory subtrees with different VZ
+ * quotas.  We keep a counter of such subtrees and set VZ quota operations or
+ * reset the default ones.
+ *
+ * Called under vz_quota_sem (from quota_on).
+ */
+int vzquota_get_super(struct super_block *sb)
+{
+	if (sb->dq_op != &vz_quota_operations) {
+		down(&sb->s_dquot.dqonoff_sem);
+		if (sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) {
+			up(&sb->s_dquot.dqonoff_sem);
+			return -EEXIST;
+		}
+		if (orig_dq_op == NULL && sb->dq_op != NULL)
+			orig_dq_op = sb->dq_op;
+		sb->dq_op = &vz_quota_operations;
+		if (orig_dq_cop == NULL && sb->s_qcop != NULL)
+			orig_dq_cop = sb->s_qcop;
+		/* XXX this may race with sys_quotactl */
+#ifdef CONFIG_VZ_QUOTA_UGID
+		sb->s_qcop = &vz_quotactl_operations;
+#else
+		sb->s_qcop = NULL;
+#endif
+		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
+		sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
+		/*
+		 * To get quotaops.h call us we need to mark superblock
+		 * as having quota.  These flags mark the moment when
+		 * our dq_op start to be called.
+		 *
+		 * The ordering of dq_op and s_dquot.flags assignment
+		 * needs to be enforced, but other CPUs do not do rmb()
+		 * between s_dquot.flags and dq_op accesses.
+		 */
+		wmb(); synchronize_kernel();
+		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
+		__module_get(THIS_MODULE);
+		up(&sb->s_dquot.dqonoff_sem);
+	}
+	/* protected by vz_quota_sem */
+	__VZ_QUOTA_SBREF(sb)++;
+	return 0;
+}
+
+/**
+ * quota_put_super - release superblock when one quota tree goes away
+ *
+ * Called under vz_quota_sem.
+ */
+void vzquota_put_super(struct super_block *sb)
+{
+	int count;
+
+	count = --__VZ_QUOTA_SBREF(sb);
+	if (count == 0) {
+		down(&sb->s_dquot.dqonoff_sem);
+		sb->s_dquot.flags = 0;
+		wmb(); synchronize_kernel();
+		sema_init(&sb->s_dquot.dqio_sem, 1);
+		sb->s_qcop = orig_dq_cop;
+		sb->dq_op = orig_dq_op;
+		inode_qmblk_lock(sb);
+		quota_gen_put(SB_QGEN(sb));
+		SB_QGEN(sb) = NULL;
+		/* release qlnk's without qmblk */
+		remove_inode_quota_links_list(&non_vzquota_inodes_lh,
+				sb, NULL);
+		/*
+		 * Races with quota initialization:
+		 * after this inode_qmblk_unlock all inode's generations are
+		 * invalidated, quota_inode_qmblk checks superblock operations.
+		 */
+		inode_qmblk_unlock(sb);
+		/*
+		 * Module refcounting: in theory, this is the best place
+		 * to call module_put(THIS_MODULE).
+		 * In reality, it can't be done because we can't be sure that
+		 * other CPUs do not enter our code segment through dq_op
+		 * cached long time ago.  Quotaops interface isn't supposed to
+		 * go into modules currently (that is, into unloadable
+		 * modules).  By omitting module_put, our module isn't
+		 * unloadable.
+		 */
+		up(&sb->s_dquot.dqonoff_sem);
+	}
+}
+
+#else
+
+struct vzquota_new_sop {
+	struct super_operations new_op;
+	struct super_operations *old_op;
+};
+
+/**
+ * vzquota_shutdown_super - callback on umount
+ */
+void vzquota_shutdown_super(struct super_block *sb)
+{
+	struct vz_quota_master *qmblk;
+	struct vzquota_new_sop *sop;
+
+	qmblk = __VZ_QUOTA_NOQUOTA(sb);
+	__VZ_QUOTA_NOQUOTA(sb) = NULL;
+	if (qmblk != NULL)
+		qmblk_put(qmblk);
+	sop = container_of(sb->s_op, struct vzquota_new_sop, new_op);
+	sb->s_op = sop->old_op;
+	kfree(sop);
+	if (sb->s_op->put_super != NULL)
+		(*sb->s_op->put_super)(sb);
+}
+
+/**
+ * vzquota_get_super - account for new a quoted tree under the superblock
+ *
+ * One superblock can have multiple directory subtrees with different VZ
+ * quotas.
+ *
+ * Called under vz_quota_sem (from vzquota_on).
+ */
+int vzquota_get_super(struct super_block *sb)
+{
+	struct vz_quota_master *qnew;
+	struct vzquota_new_sop *sop;
+	int err;
+
+	down(&sb->s_dquot.dqonoff_sem);
+	err = -EEXIST;
+	if ((sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) &&
+	    sb->dq_op != &vz_quota_operations)
+		goto out_up;
+
+	/*
+	 * This allocation code should be under sb->dq_op check below, but
+	 * it doesn't really matter...
+	 */
+	if (__VZ_QUOTA_NOQUOTA(sb) == NULL) {
+		qnew = vzquota_alloc_fake();
+		if (qnew == NULL)
+			goto out_up;
+		__VZ_QUOTA_NOQUOTA(sb) = qnew;
+	}
+
+	if (sb->dq_op != &vz_quota_operations) {
+		sop = kmalloc(sizeof(*sop), GFP_KERNEL);
+		if (sop == NULL) {
+			vzquota_free_master(__VZ_QUOTA_NOQUOTA(sb));
+			__VZ_QUOTA_NOQUOTA(sb) = NULL;
+			goto out_up;
+		}
+		memcpy(&sop->new_op, sb->s_op, sizeof(sop->new_op));
+		sop->new_op.put_super = &vzquota_shutdown_super;
+		sop->old_op = sb->s_op;
+		sb->s_op = &sop->new_op;
+
+		sb->dq_op = &vz_quota_operations;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		sb->s_qcop = &vz_quotactl_operations;
+#else
+		sb->s_qcop = NULL;
+#endif
+		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
+
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+		/* these 2 list heads are checked in sync_dquots() */
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		sb->s_dquot.info[USRQUOTA].dqi_format =
+						&vz_quota_empty_v2_format;
+		sb->s_dquot.info[GRPQUOTA].dqi_format =
+						&vz_quota_empty_v2_format;
+
+		/*
+		 * To get quotaops.h to call us we need to mark superblock
+		 * as having quota.  These flags mark the moment when
+		 * our dq_op start to be called.
+		 *
+		 * The ordering of dq_op and s_dquot.flags assignment
+		 * needs to be enforced, but other CPUs do not do rmb()
+		 * between s_dquot.flags and dq_op accesses.
+		 */
+		wmb(); synchronize_kernel();
+		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
+	}
+	err = 0;
+
+out_up:
+	up(&sb->s_dquot.dqonoff_sem);
+	return err;
+}
+
+/**
+ * vzquota_put_super - one quota tree less on this superblock
+ *
+ * Called under vz_quota_sem.
+ */
+void vzquota_put_super(struct super_block *sb)
+{
+	/*
+	 * Even if this put is the last one,
+	 * sb->s_dquot.flags can't be cleared, because otherwise vzquota_drop
+	 * won't be called and the remaining qmblk references won't be put.
+	 */
+}
+
+#endif
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Helpers for inode -> qmblk link maintenance
+ *
+ * --------------------------------------------------------------------- */
+
+#define __VZ_QUOTA_EMPTY		((void *)0xbdbdbdbd)
+#define VZ_QUOTA_IS_NOQUOTA(qm, sb)	((qm)->dq_flags & VZDQ_NOQUOT)
+#define VZ_QUOTA_EMPTY_IOPS		(&vfs_empty_iops)
+extern struct inode_operations vfs_empty_iops;
+
+static int VZ_QUOTA_IS_ACTUAL(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk == VZ_QUOTA_BAD)
+		return 1;
+	if (qmblk == __VZ_QUOTA_EMPTY)
+		return 0;
+	if (qmblk->dq_flags & VZDQ_NOACT)
+		/* not actual (invalidated) qmblk */
+		return 0;
+	return 1;
+}
+
+static inline int vzquota_qlnk_is_empty(struct vz_quota_ilink *qlnk)
+{
+	return qlnk->qmblk == __VZ_QUOTA_EMPTY;
+}
+
+static inline void set_qlnk_origin(struct vz_quota_ilink *qlnk,
+		unsigned char origin)
+{
+	qlnk->origin[0] = qlnk->origin[1];
+	qlnk->origin[1] = origin;
+}
+
+static inline void vzquota_qlnk_set_empty(struct vz_quota_ilink *qlnk)
+{
+	qlnk->qmblk = __VZ_QUOTA_EMPTY;
+	set_qlnk_origin(qlnk, VZ_QUOTAO_SETE);
+}
+
+void vzquota_qlnk_init(struct vz_quota_ilink *qlnk)
+{
+	memset(qlnk, 0, sizeof(*qlnk));
+	INIT_LIST_HEAD(&qlnk->list);
+	vzquota_qlnk_set_empty(qlnk);
+	set_qlnk_origin(qlnk, VZ_QUOTAO_INIT);
+}
+
+void vzquota_qlnk_destroy(struct vz_quota_ilink *qlnk)
+{
+	might_sleep();
+	if (vzquota_qlnk_is_empty(qlnk))
+		return;
+#if defined(CONFIG_VZ_QUOTA_UGID)
+	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD) {
+		struct vz_quota_master *qmblk;
+		struct vz_quota_ugid *quid, *qgid;
+		qmblk = qlnk->qmblk;
+		quid = qlnk->qugid[USRQUOTA];
+		qgid = qlnk->qugid[GRPQUOTA];
+		if (quid != NULL || qgid != NULL) {
+			down(&qmblk->dq_sem);
+			if (qgid != NULL)
+				vzquota_put_ugid(qmblk, qgid);
+			if (quid != NULL)
+				vzquota_put_ugid(qmblk, quid);
+			up(&qmblk->dq_sem);
+		}
+	}
+#endif
+	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qlnk->qmblk);
+	set_qlnk_origin(qlnk, VZ_QUOTAO_DESTR);
+}
+
+/**
+ * vzquota_qlnk_swap - swap inode's and temporary vz_quota_ilink contents
+ * @qlt: temporary
+ * @qli: inode's
+ *
+ * Locking is provided by the caller (depending on the context).
+ * After swap, @qli is inserted into the corresponding dq_ilink_list,
+ * @qlt list is reinitialized.
+ */
+static void vzquota_qlnk_swap(struct vz_quota_ilink *qlt,
+		struct vz_quota_ilink *qli)
+{
+	struct vz_quota_master *qb;
+	struct vz_quota_ugid *qu;
+	int i;
+
+	qb = qlt->qmblk;
+	qlt->qmblk = qli->qmblk;
+	qli->qmblk = qb;
+	list_del_init(&qli->list);
+	if (qb != __VZ_QUOTA_EMPTY && qb != VZ_QUOTA_BAD)
+		list_add(&qli->list, &qb->dq_ilink_list);
+	INIT_LIST_HEAD(&qlt->list);
+	set_qlnk_origin(qli, VZ_QUOTAO_SWAP);
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		qu = qlt->qugid[i];
+		qlt->qugid[i] = qli->qugid[i];
+		qli->qugid[i] = qu;
+	}
+}
+
+/**
+ * vzquota_qlnk_reinit_locked - destroy qlnk content, called under locks
+ *
+ * Called under dcache_lock and inode_qmblk locks.
+ * Returns 1 if locks were dropped inside, 0 if atomic.
+ */
+static int vzquota_qlnk_reinit_locked(struct vz_quota_ilink *qlnk,
+		struct inode *inode)
+{
+	if (vzquota_qlnk_is_empty(qlnk))
+		return 0;
+	if (qlnk->qmblk == VZ_QUOTA_BAD) {
+		vzquota_qlnk_set_empty(qlnk);
+		set_qlnk_origin(qlnk, VZ_QUOTAO_RE_LOCK);
+		return 0;
+	}
+	spin_unlock(&dcache_lock);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(qlnk);
+	vzquota_qlnk_init(qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	spin_lock(&dcache_lock);
+	return 1;
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_qlnk_reinit_attr - destroy and reinit qlnk content
+ *
+ * Similar to vzquota_qlnk_reinit_locked, called under different locks.
+ */
+static int vzquota_qlnk_reinit_attr(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct vz_quota_master *qmblk)
+{
+	if (vzquota_qlnk_is_empty(qlnk))
+		return 0;
+	/* may be optimized if qlnk->qugid all NULLs */
+	qmblk_data_write_unlock(qmblk);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(qlnk);
+	vzquota_qlnk_init(qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	qmblk_data_write_lock(qmblk);
+	return 1;
+}
+#endif
+
+/**
+ * vzquota_qlnk_fill - fill vz_quota_ilink content
+ * @qlnk: vz_quota_ilink to fill
+ * @inode: inode for which @qlnk is filled (i_sb, i_uid, i_gid)
+ * @qmblk: qmblk to which this @qlnk will belong
+ *
+ * Called under dcache_lock and inode_qmblk locks.
+ * Returns 1 if locks were dropped inside, 0 if atomic.
+ * @qlnk is expected to be empty.
+ */
+static int vzquota_qlnk_fill(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct vz_quota_master *qmblk)
+{
+	if (qmblk != VZ_QUOTA_BAD)
+		qmblk_get(qmblk);
+	qlnk->qmblk = qmblk;
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+	if (qmblk != VZ_QUOTA_BAD &&
+	    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
+	    (qmblk->dq_flags & VZDQUG_ON)) {
+		struct vz_quota_ugid *quid, *qgid;
+
+		spin_unlock(&dcache_lock);
+		inode_qmblk_unlock(inode->i_sb);
+
+		down(&qmblk->dq_sem);
+		quid = __vzquota_find_ugid(qmblk, inode->i_uid, USRQUOTA, 0);
+		qgid = __vzquota_find_ugid(qmblk, inode->i_gid, GRPQUOTA, 0);
+		up(&qmblk->dq_sem);
+
+		inode_qmblk_lock(inode->i_sb);
+		spin_lock(&dcache_lock);
+		qlnk->qugid[USRQUOTA] = quid;
+		qlnk->qugid[GRPQUOTA] = qgid;
+		return 1;
+	}
+#endif
+
+	return 0;
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_qlnk_fill_attr - fill vz_quota_ilink content for uid, gid
+ *
+ * This function is a helper for vzquota_transfer, and differs from
+ * vzquota_qlnk_fill only by locking.
+ */
+static int vzquota_qlnk_fill_attr(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct iattr *iattr,
+		int mask,
+		struct vz_quota_master *qmblk)
+{
+	qmblk_get(qmblk);
+	qlnk->qmblk = qmblk;
+
+	if (mask) {
+		struct vz_quota_ugid *quid, *qgid;
+
+		quid = qgid = NULL; /* to make gcc happy */
+		if (!(mask & (1 << USRQUOTA)))
+			quid = vzquota_get_ugid(INODE_QLNK(inode)->
+							qugid[USRQUOTA]);
+		if (!(mask & (1 << GRPQUOTA)))
+			qgid = vzquota_get_ugid(INODE_QLNK(inode)->
+							qugid[GRPQUOTA]);
+
+		qmblk_data_write_unlock(qmblk);
+		inode_qmblk_unlock(inode->i_sb);
+
+		down(&qmblk->dq_sem);
+		if (mask & (1 << USRQUOTA))
+			quid = __vzquota_find_ugid(qmblk, iattr->ia_uid,
+					USRQUOTA, 0);
+		if (mask & (1 << GRPQUOTA))
+			qgid = __vzquota_find_ugid(qmblk, iattr->ia_gid,
+					GRPQUOTA, 0);
+		up(&qmblk->dq_sem);
+
+		inode_qmblk_lock(inode->i_sb);
+		qmblk_data_write_lock(qmblk);
+		qlnk->qugid[USRQUOTA] = quid;
+		qlnk->qugid[GRPQUOTA] = qgid;
+		return 1;
+	}
+
+	return 0;
+}
+#endif
+
+/**
+ * __vzquota_inode_init - make sure inode's qlnk is initialized
+ *
+ * May be called if qlnk is already initialized, detects this situation itself.
+ * Called under inode_qmblk_lock.
+ */
+static void __vzquota_inode_init(struct inode *inode, unsigned char origin)
+{
+	if (inode->i_dquot[USRQUOTA] == NODQUOT) {
+		vzquota_qlnk_init(INODE_QLNK(inode));
+		inode->i_dquot[USRQUOTA] = (void *)~(unsigned long)NODQUOT;
+	}
+	set_qlnk_origin(INODE_QLNK(inode), origin);
+}
+
+/**
+ * vzquota_inode_drop - destroy VZ quota information in the inode
+ *
+ * Inode must not be externally accessible or dirty.
+ */
+static void vzquota_inode_drop(struct inode *inode)
+{
+	struct vz_quota_ilink qlnk;
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	vzquota_qlnk_swap(&qlnk, INODE_QLNK(inode));
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_DRCAL);
+	inode->i_dquot[USRQUOTA] = NODQUOT;
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+}
+
+/**
+ * vzquota_inode_qmblk_set - initialize inode's qlnk
+ * @inode: inode to be initialized
+ * @qmblk: quota master block to which this inode should belong (may be BAD)
+ * @qlnk: placeholder to store data to resolve locking issues
+ *
+ * Returns 1 if locks were dropped and rechecks possibly needed, 0 otherwise.
+ * Called under dcache_lock and inode_qmblk locks.
+ * @qlnk will be destroyed in the caller chain.
+ *
+ * It is not mandatory to restart parent checks since quota on/off currently
+ * shrinks dentry tree and checks that there are not outside references.
+ * But if at some time that shink is removed, restarts will be required.
+ * Additionally, the restarts prevent inconsistencies if the dentry tree
+ * changes (inode is moved).  This is not a big deal, but anyway...
+ */
+static int vzquota_inode_qmblk_set(struct inode *inode,
+		struct vz_quota_master *qmblk,
+		struct vz_quota_ilink *qlnk)
+{
+	if (qmblk == NULL) {
+		printk(KERN_ERR "VZDQ: NULL in set, orig {%u, %u}, "
+				"dev %s, inode %lu, fs %s\n",
+				INODE_QLNK(inode)->origin[0],
+				INODE_QLNK(inode)->origin[1],
+				inode->i_sb->s_id, inode->i_ino,
+				inode->i_sb->s_type->name);
+		printk(KERN_ERR "current %d (%s), VE %d\n",
+				current->pid, current->comm,
+				VEID(get_exec_env()));
+		dump_stack();
+		qmblk = VZ_QUOTA_BAD;
+	}
+	while (1) {
+		if (vzquota_qlnk_is_empty(qlnk) &&
+		    vzquota_qlnk_fill(qlnk, inode, qmblk))
+			return 1;
+		if (qlnk->qmblk == qmblk)
+			break;
+		if (vzquota_qlnk_reinit_locked(qlnk, inode))
+			return 1;
+	}
+	vzquota_qlnk_swap(qlnk, INODE_QLNK(inode));
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_QSET);
+	return 0;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * vzquota_inode_qmblk (inode -> qmblk lookup) parts
+ *
+ * --------------------------------------------------------------------- */
+
+static int vzquota_dparents_check_attach(struct inode *inode)
+{
+	if (!list_empty(&inode->i_dentry))
+		return 0;
+	printk(KERN_ERR "VZDQ: no parent for "
+			"dev %s, inode %lu, fs %s\n",
+			inode->i_sb->s_id,
+			inode->i_ino,
+			inode->i_sb->s_type->name);
+	return -1;
+}
+
+static struct inode *vzquota_dparents_check_actual(struct inode *inode)
+{
+	struct dentry *de;
+
+	list_for_each_entry(de, &inode->i_dentry, d_alias) {
+		if (de->d_parent == de) /* detached dentry, perhaps */
+			continue;
+		/* first access to parent, make sure its qlnk initialized */
+		__vzquota_inode_init(de->d_parent->d_inode, VZ_QUOTAO_ACT);
+		if (!VZ_QUOTA_IS_ACTUAL(de->d_parent->d_inode))
+			return de->d_parent->d_inode;
+	}
+	return NULL;
+}
+
+static struct vz_quota_master *vzquota_dparents_check_same(struct inode *inode)
+{
+	struct dentry *de;
+	struct vz_quota_master *qmblk;
+
+	qmblk = NULL;
+	list_for_each_entry(de, &inode->i_dentry, d_alias) {
+		if (de->d_parent == de) /* detached dentry, perhaps */
+			continue;
+		if (qmblk == NULL) {
+			qmblk = INODE_QLNK(de->d_parent->d_inode)->qmblk;
+			continue;
+		}
+		if (INODE_QLNK(de->d_parent->d_inode)->qmblk != qmblk) {
+			printk(KERN_WARNING "VZDQ: multiple quotas for "
+					"dev %s, inode %lu, fs %s\n",
+					inode->i_sb->s_id,
+					inode->i_ino,
+					inode->i_sb->s_type->name);
+			qmblk = VZ_QUOTA_BAD;
+			break;
+		}
+	}
+	if (qmblk == NULL) {
+		printk(KERN_WARNING "VZDQ: not attached to tree, "
+				"dev %s, inode %lu, fs %s\n",
+				inode->i_sb->s_id,
+				inode->i_ino,
+				inode->i_sb->s_type->name);
+		qmblk = VZ_QUOTA_BAD;
+	}
+	return qmblk;
+}
+
+static void vzquota_dbranch_actualize(struct inode *inode,
+		struct inode *refinode)
+{
+	struct inode *pinode;
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk;
+
+	vzquota_qlnk_init(&qlnk);
+
+start:
+	if (inode == inode->i_sb->s_root->d_inode) {
+		/* filesystem root */
+		atomic_inc(&inode->i_count);
+		do {
+			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+		} while (vzquota_inode_qmblk_set(inode, qmblk, &qlnk));
+		goto out;
+	}
+
+	if (!vzquota_dparents_check_attach(inode)) {
+		pinode = vzquota_dparents_check_actual(inode);
+		if (pinode != NULL) {
+			inode = pinode;
+			goto start;
+		}
+	}
+
+	atomic_inc(&inode->i_count);
+	while (1) {
+		if (VZ_QUOTA_IS_ACTUAL(inode)) /* actualized without us */
+			break;
+		/*
+		 * Need to check parents again if we have slept inside
+		 * vzquota_inode_qmblk_set() in the loop.
+		 * If the state of parents is different, just return and repeat
+		 * the actualizing process again from the inode passed to
+		 * vzquota_inode_qmblk_recalc().
+		 */
+		if (!vzquota_dparents_check_attach(inode)) {
+			if (vzquota_dparents_check_actual(inode) != NULL)
+				break;
+			qmblk = vzquota_dparents_check_same(inode);
+		} else
+			qmblk = VZ_QUOTA_BAD;
+		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk)){/* success */
+			set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_ACT);
+			break;
+		}
+	}
+
+out:
+	spin_unlock(&dcache_lock);
+	inode_qmblk_unlock(refinode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+	iput(inode);
+	inode_qmblk_lock(refinode->i_sb);
+	spin_lock(&dcache_lock);
+}
+
+static void vzquota_dtree_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	struct inode *pinode;
+	struct vz_quota_master *qmblk;
+
+	if (inode == inode->i_sb->s_root->d_inode) {
+		/* filesystem root */
+		do {
+			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+		} while (vzquota_inode_qmblk_set(inode, qmblk, qlnk));
+		return;
+	}
+
+start:
+	if (VZ_QUOTA_IS_ACTUAL(inode))
+		return;
+	/*
+	 * Here qmblk is (re-)initialized for all ancestors.
+	 * This is not a very efficient procedure, but it guarantees that
+	 * the quota tree is consistent (that is, the inode doesn't have two
+	 * ancestors with different qmblk).
+	 */
+	if (!vzquota_dparents_check_attach(inode)) {
+		pinode = vzquota_dparents_check_actual(inode);
+		if (pinode != NULL) {
+			vzquota_dbranch_actualize(pinode, inode);
+			goto start;
+		}
+		qmblk = vzquota_dparents_check_same(inode);
+	} else
+		qmblk = VZ_QUOTA_BAD;
+
+	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
+		goto start;
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_DTREE);
+}
+
+static void vzquota_det_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	struct inode *parent;
+	struct vz_quota_master *qmblk;
+	char *msg;
+	int cnt;
+	time_t timeout;
+
+	cnt = 0;
+	parent = NULL;
+start:
+	/*
+	 * qmblk of detached inodes shouldn't be considered as not actual.
+	 * They are not in any dentry tree, so quota on/off shouldn't affect
+	 * them.
+	 */
+	if (!vzquota_qlnk_is_empty(INODE_QLNK(inode)))
+		return;
+
+	timeout = 3;
+	qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+	/*
+	 * Scenario:
+	 *	open
+	 *	unlink
+	 * 	quotaon
+	 *	generic_delete_inode
+	 *
+	 * This is the first time vzquota sees inode. inode is outside of
+	 * vzquota area of interest, otherwise quotaon would have got -EBUSY
+	 * due to shrink_dcache_parent().
+	 * inode is almost completely destroyed, so don't intervene.
+	 * 
+	 * dev@:
+	 * However, there is a small race here...
+	 * dput() first removes itself from all the lists,
+	 * so shrink_dcache_parent() can succeed while dentry_iput is not
+	 * done yet.
+	 */
+	if (inode->i_state & I_FREEING)
+		goto set;
+
+	msg = "detached inode not in creation";
+	if (inode->i_op != VZ_QUOTA_EMPTY_IOPS)
+		goto fail;
+	qmblk = VZ_QUOTA_BAD;
+	msg = "unexpected creation context";
+	if (!vzquota_cur_qmblk_check())
+		goto fail;
+	timeout = 0;
+	parent = vzquota_cur_qmblk_fetch();
+	msg = "uninitialized parent";
+	if (vzquota_qlnk_is_empty(INODE_QLNK(parent)))
+		goto fail;
+	msg = "parent not in tree";
+	if (list_empty(&parent->i_dentry))
+		goto fail;
+	msg = "parent has 0 refcount";
+	if (!atomic_read(&parent->i_count))
+		goto fail;
+	msg = "parent has different sb";
+	if (parent->i_sb != inode->i_sb)
+		goto fail;
+	if (!VZ_QUOTA_IS_ACTUAL(parent)) {
+		vzquota_dbranch_actualize(parent, inode);
+		goto start;
+	}
+
+	qmblk = INODE_QLNK(parent)->qmblk;
+set:
+	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
+		goto start;
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_DET);
+	return;
+
+fail:
+	{
+		struct timeval tv, tvo;
+		do_gettimeofday(&tv);
+		memcpy(&tvo, __VZ_QUOTA_TSTAMP(inode->i_sb), sizeof(tvo));
+		tv.tv_sec -= tvo.tv_sec;
+		if (tv.tv_usec < tvo.tv_usec) {
+			tv.tv_sec--;
+			tv.tv_usec += USEC_PER_SEC - tvo.tv_usec;
+		} else
+			tv.tv_usec -= tvo.tv_usec;
+		if (tv.tv_sec < timeout)
+			goto set;
+		printk(KERN_ERR "VZDQ: %s, orig {%u, %u},"
+			" dev %s, inode %lu, fs %s\n",
+			msg,
+			INODE_QLNK(inode)->origin[0],
+			INODE_QLNK(inode)->origin[1],
+			inode->i_sb->s_id, inode->i_ino,
+			inode->i_sb->s_type->name);
+		printk(KERN_ERR "i_count %u, ", atomic_read(&inode->i_count));
+		printk(KERN_ERR "i_mode %o, ", inode->i_mode);
+		printk(KERN_ERR "i_state %lx, ", inode->i_state);
+		printk(KERN_ERR "i_flags %x\n", inode->i_flags);
+		printk(KERN_ERR "i_op %p, vfs_empty_iops %p, "
+				"i_fop %p, i_mapping %p\n",
+				inode->i_op, &vfs_empty_iops,
+				inode->i_fop, inode->i_mapping);
+		if (!cnt++) {
+			printk(KERN_ERR "current %d (%s), VE %d,"
+				" time %ld.%06ld\n",
+				current->pid, current->comm,
+				VEID(get_exec_env()),
+				tv.tv_sec, tv.tv_usec);
+			dump_stack();
+		}
+		if (parent != NULL)
+			printk(KERN_ERR "VZDQ: parent of %lu is %lu\n",
+				inode->i_ino, parent->i_ino);
+	}
+	goto set;
+}
+
+static void vzquota_inode_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	spin_lock(&dcache_lock);
+	if (!list_empty(&inode->i_dentry))
+		vzquota_dtree_qmblk_recalc(inode, qlnk);
+	else
+		vzquota_det_qmblk_recalc(inode, qlnk);
+	spin_unlock(&dcache_lock);
+}
+
+/**
+ * vzquota_inode_qmblk - obtain inode's qmblk
+ *
+ * Returns qmblk with refcounter taken, %NULL if not under
+ * VZ quota or %VZ_QUOTA_BAD.
+ *
+ * FIXME: This function should be removed when vzquota_find_qmblk /
+ * get_quota_root / vzquota_dstat code is cleaned up.
+ */
+struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk;
+
+	might_sleep();
+
+	if (inode->i_sb->dq_op != &vz_quota_operations)
+		return NULL;
+#if defined(VZ_QUOTA_UNLOAD)
+#error Make sure qmblk does not disappear
+#endif
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+	    !VZ_QUOTA_IS_ACTUAL(inode))
+		vzquota_inode_qmblk_recalc(inode, &qlnk);
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != VZ_QUOTA_BAD) {
+		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb))
+			qmblk_get(qmblk);
+		else
+			qmblk = NULL;
+	}
+
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+	return qmblk;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Calls from quota operations
+ *
+ * --------------------------------------------------------------------- */
+
+/**
+ * vzquota_inode_init_call - call from DQUOT_INIT
+ */
+void vzquota_inode_init_call(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	/* initializes inode's quota inside */
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		vzquota_data_unlock(inode, &data);
+
+	/*
+	 * The check is needed for repeated new_inode() calls from a single
+	 * ext3 call like create or mkdir in case of -ENOSPC.
+	 */
+	spin_lock(&dcache_lock);
+	if (!list_empty(&inode->i_dentry))
+		vzquota_cur_qmblk_set(inode);
+	spin_unlock(&dcache_lock);
+}
+
+/**
+ * vzquota_inode_drop_call - call from DQUOT_DROP
+ */
+void vzquota_inode_drop_call(struct inode *inode)
+{
+	vzquota_inode_drop(inode);
+}
+
+/**
+ * vzquota_inode_data - initialize (if nec.) and lock inode quota ptrs
+ * @inode: the inode
+ * @data: storage space
+ *
+ * Returns: qmblk is NULL or VZ_QUOTA_BAD or actualized qmblk.
+ * On return if qmblk is neither NULL nor VZ_QUOTA_BAD:
+ *   qmblk in inode's qlnk is the same as returned,
+ *   ugid pointers inside inode's qlnk are valid,
+ *   some locks are taken (and should be released by vzquota_data_unlock).
+ * If qmblk is NULL or VZ_QUOTA_BAD, locks are NOT taken.
+ */
+struct vz_quota_master *vzquota_inode_data(struct inode *inode,
+		struct vz_quota_datast *data)
+{
+	struct vz_quota_master *qmblk;
+
+	might_sleep();
+
+	vzquota_qlnk_init(&data->qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	if (unlikely(inode->i_flags & S_NOQUOTA)) {
+		inode_qmblk_unlock(inode->i_sb);
+		return NULL;
+	}
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+	    !VZ_QUOTA_IS_ACTUAL(inode))
+		vzquota_inode_qmblk_recalc(inode, &data->qlnk);
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != VZ_QUOTA_BAD) {
+		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb)) {
+			/*
+			 * Note that in the current implementation,
+			 * inode_qmblk_lock can theoretically be dropped here.
+			 * This place is serialized with quota_off because
+			 * quota_off fails when there are extra dentry
+			 * references and syncs inodes before removing quota
+			 * information from them.
+			 * However, quota usage information should stop being
+			 * updated immediately after vzquota_off.
+			 */
+			qmblk_data_write_lock(qmblk);
+		} else {
+			inode_qmblk_unlock(inode->i_sb);
+			qmblk = NULL;
+		}
+	} else {
+		inode_qmblk_unlock(inode->i_sb);
+	}
+	return qmblk;
+}
+
+void vzquota_data_unlock(struct inode *inode,
+		struct vz_quota_datast *data)
+{
+	qmblk_data_write_unlock(INODE_QLNK(inode)->qmblk);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&data->qlnk);
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_inode_transfer_call - call from vzquota_transfer
+ */
+int vzquota_inode_transfer_call(struct inode *inode, struct iattr *iattr)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	struct vz_quota_ilink qlnew;
+	int mask;
+	int ret;
+
+	might_sleep();
+	vzquota_qlnk_init(&qlnew);
+start:
+	qmblk = vzquota_inode_data(inode, &data);
+	ret = NO_QUOTA;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out_destr;
+	ret = QUOTA_OK;
+	if (qmblk == NULL)
+		goto out_destr;
+	qmblk_get(qmblk);
+
+	ret = QUOTA_OK;
+	if (!(qmblk->dq_flags & VZDQUG_ON))
+		/* no ugid quotas */
+		goto out_unlock;
+
+	mask = 0;
+	if ((iattr->ia_valid & ATTR_UID) && iattr->ia_uid != inode->i_uid)
+		mask |= 1 << USRQUOTA;
+	if ((iattr->ia_valid & ATTR_GID) && iattr->ia_gid != inode->i_gid)
+		mask |= 1 << GRPQUOTA;
+	while (1) {
+		if (vzquota_qlnk_is_empty(&qlnew) &&
+		    vzquota_qlnk_fill_attr(&qlnew, inode, iattr, mask, qmblk))
+			break;
+		if (qlnew.qmblk == INODE_QLNK(inode)->qmblk &&
+		    qlnew.qmblk == qmblk)
+			goto finish;
+		if (vzquota_qlnk_reinit_attr(&qlnew, inode, qmblk))
+			break;
+	}
+
+	/* prepare for restart */
+	vzquota_data_unlock(inode, &data);
+	qmblk_put(qmblk);
+	goto start;
+
+finish:
+	/* all references obtained successfully */
+	ret = vzquota_transfer_usage(inode, mask, &qlnew);
+	if (!ret) {
+		vzquota_qlnk_swap(&qlnew, INODE_QLNK(inode));
+		set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_TRANS);
+	}
+out_unlock:
+	vzquota_data_unlock(inode, &data);
+	qmblk_put(qmblk);
+out_destr:
+	vzquota_qlnk_destroy(&qlnew);
+	return ret;
+}
+#endif
+
+int vzquota_rename_check(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk1, qlnk2;
+	int c, ret;
+
+	if (inode->i_sb != old_dir->i_sb || inode->i_sb != new_dir->i_sb)
+		return -1;
+
+	might_sleep();
+
+	vzquota_qlnk_init(&qlnk1);
+	vzquota_qlnk_init(&qlnk2);
+	inode_qmblk_lock(inode->i_sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+	__vzquota_inode_init(old_dir, VZ_QUOTAO_INICAL);
+	__vzquota_inode_init(new_dir, VZ_QUOTAO_INICAL);
+
+	do {
+		c = 0;
+		if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+		    !VZ_QUOTA_IS_ACTUAL(inode)) {
+			vzquota_inode_qmblk_recalc(inode, &qlnk1);
+			c++;
+		}
+		if (vzquota_qlnk_is_empty(INODE_QLNK(new_dir)) ||
+		    !VZ_QUOTA_IS_ACTUAL(new_dir)) {
+			vzquota_inode_qmblk_recalc(new_dir, &qlnk2);
+			c++;
+		}
+	} while (c);
+
+	ret = 0;
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != INODE_QLNK(new_dir)->qmblk) {
+		ret = -1;
+		if (qmblk != VZ_QUOTA_BAD &&
+		    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
+		    qmblk->dq_root_dentry->d_inode == inode &&
+		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(new_dir)->qmblk,
+			    				inode->i_sb) &&
+		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(old_dir)->qmblk,
+			    				inode->i_sb))
+			/* quota root rename is allowed */
+			ret = 0;
+	}
+
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk2);
+	vzquota_qlnk_destroy(&qlnk1);
+	return ret;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * qmblk-related parts of on/off operations
+ *
+ * --------------------------------------------------------------------- */
+
+/**
+ * vzquota_check_dtree - check dentry tree if quota on/off is allowed
+ *
+ * This function doesn't allow quota to be turned on/off if some dentries in
+ * the tree have external references.
+ * In addition to technical reasons, it enforces user-space correctness:
+ * current usage (taken from or reported to the user space) can be meaningful
+ * and accurate only if the tree is not being modified.
+ * Side effect: additional vfsmount structures referencing the tree (bind
+ * mounts of tree nodes to some other places) are not allowed at on/off time.
+ */
+int vzquota_check_dtree(struct vz_quota_master *qmblk, int off)
+{
+	struct dentry *dentry;
+	int err, count;
+
+	err = -EBUSY;
+	dentry = qmblk->dq_root_dentry;
+
+	if (d_unhashed(dentry) && dentry != dentry->d_sb->s_root)
+		goto unhashed;
+
+	/* attempt to shrink */
+  	if (!list_empty(&dentry->d_subdirs)) {
+		spin_unlock(&dcache_lock);
+		inode_qmblk_unlock(dentry->d_sb);
+		shrink_dcache_parent(dentry);
+		inode_qmblk_lock(dentry->d_sb);
+		spin_lock(&dcache_lock);
+		if (!list_empty(&dentry->d_subdirs))
+			goto out;
+
+		count = 1;
+		if (dentry == dentry->d_sb->s_root)
+			count += 2;	/* sb and mnt refs */
+		if (atomic_read(&dentry->d_count) < count) {
+			printk(KERN_ERR "%s: too small count %d vs %d.\n",
+					__FUNCTION__,
+					atomic_read(&dentry->d_count), count);
+			goto out;
+		}
+		if (atomic_read(&dentry->d_count) > count)
+			goto out;
+	}
+
+	err = 0;
+out:
+	return err;
+
+unhashed:
+	/*
+	 * Quota root is removed.
+	 * Allow to turn quota off, but not on.
+	 */
+	if (off)
+		err = 0;
+	goto out;
+}
+
+int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
+		struct vz_quota_master *qmblk)
+{
+	struct vz_quota_ilink qlnk;
+	struct vz_quota_master *qold, *qnew;
+	int err;
+
+	might_sleep();
+
+	qold = NULL;
+	qnew = vzquota_alloc_fake();
+	if (qnew == NULL)
+		return -ENOMEM;
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	spin_lock(&dcache_lock);
+	while (1) {
+		err = vzquota_check_dtree(qmblk, 0);
+		if (err)
+			break;
+		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk))
+			break;
+	}
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_ON);
+	spin_unlock(&dcache_lock);
+
+	if (!err) {
+		qold = __VZ_QUOTA_NOQUOTA(sb);
+		qold->dq_flags |= VZDQ_NOACT;
+		__VZ_QUOTA_NOQUOTA(sb) = qnew;
+	}
+
+	inode_qmblk_unlock(sb);
+	vzquota_qlnk_destroy(&qlnk);
+	if (qold != NULL)
+		qmblk_put(qold);
+
+	return err;
+}
+
+int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk)
+{
+	int ret;
+
+	ret = 0;
+	inode_qmblk_lock(sb);
+
+	spin_lock(&dcache_lock);
+	if (vzquota_check_dtree(qmblk, 1))
+		ret = -EBUSY;
+	spin_unlock(&dcache_lock);
+
+	if (!ret)
+		qmblk->dq_flags |= VZDQ_NOACT | VZDQ_NOQUOT;
+	inode_qmblk_unlock(sb);
+	return ret;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * External interfaces
+ *
+ * ---------------------------------------------------------------------*/
+
+static int vzquota_ioctl(struct inode *ino, struct file *file,
+		unsigned int cmd, unsigned long arg)
+{
+	int err;
+	struct vzctl_quotactl qb;
+	struct vzctl_quotaugidctl qub;
+
+	switch (cmd) {
+		case VZCTL_QUOTA_CTL:
+			err = -ENOTTY;
+			break;
+		case VZCTL_QUOTA_NEW_CTL:
+			err = -EFAULT;
+			if (copy_from_user(&qb, (void *)arg, sizeof(qb)))
+				break;
+			err = do_vzquotactl(qb.cmd, qb.quota_id,
+					qb.qstat, qb.ve_root);
+			break;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		case VZCTL_QUOTA_UGID_CTL:
+			err = -EFAULT;
+			if (copy_from_user(&qub, (void *)arg, sizeof(qub)))
+				break;
+			err = do_vzquotaugidctl(&qub);
+			break;
+#endif
+		default:
+			err = -ENOTTY;
+	}
+	might_sleep(); /* debug */
+	return err;
+}
+
+static struct vzioctlinfo vzdqcalls = {
+	.type	= VZDQCTLTYPE,
+	.func	= vzquota_ioctl,
+	.owner	= THIS_MODULE,
+};
+
+/**
+ * vzquota_dstat - get quota usage info for virtual superblock
+ */
+static int vzquota_dstat(struct super_block *super, struct dq_stat *qstat)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = vzquota_find_qmblk(super);
+	if (qmblk == NULL)
+		return -ENOENT;
+	if (qmblk == VZ_QUOTA_BAD) {
+		memset(qstat, 0, sizeof(*qstat));
+		return 0;
+	}
+
+	qmblk_data_read_lock(qmblk);
+	memcpy(qstat, &qmblk->dq_stat, sizeof(*qstat));
+	qmblk_data_read_unlock(qmblk);
+	qmblk_put(qmblk);
+	return 0;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Init/exit helpers
+ *
+ * ---------------------------------------------------------------------*/
+
+static int vzquota_cache_init(void)
+{
+	int i;
+
+	vzquota_cachep = kmem_cache_create("vz_quota_master",
+					 sizeof(struct vz_quota_master),
+					 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (vzquota_cachep == NULL) {
+		printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
+		goto nomem2;
+	}
+	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&vzquota_hash_table[i]);
+
+	return 0;
+
+nomem2:
+	return -ENOMEM;
+}
+
+static void vzquota_cache_release(void)
+{
+	int i;
+
+	/* sanity check */
+	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
+		if (!list_empty(&vzquota_hash_table[i]))
+			BUG();
+
+	/* release caches */
+	if (kmem_cache_destroy(vzquota_cachep))
+		printk(KERN_ERR
+			"VZQUOTA: vz_quota_master kmem_cache_destroy failed\n");
+	vzquota_cachep = NULL;
+}
+
+static int quota_notifier_call(struct vnotifier_block *self,
+		unsigned long n, void *data, int err)
+{
+	struct virt_info_quota *viq;
+	struct super_block *sb;
+
+	viq = (struct virt_info_quota *)data;
+	switch (n) {
+	case VIRTINFO_QUOTA_ON:
+		err = NOTIFY_BAD;
+		if (!try_module_get(THIS_MODULE))
+			break;
+		sb = viq->super;
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		err = NOTIFY_OK;
+		break;
+	case VIRTINFO_QUOTA_OFF:
+		module_put(THIS_MODULE);
+		err = NOTIFY_OK;
+		break;
+	case VIRTINFO_QUOTA_GETSTAT:
+		err = NOTIFY_BAD;
+		if (vzquota_dstat(viq->super, viq->qstat))
+			break;
+		err = NOTIFY_OK;
+		break;
+	case VIRTINFO_QUOTA_DISABLE:
+		err = NOTIFY_OK;
+		vzquota_inode_off((struct inode *)data);
+		break;
+	}
+	return err;
+}
+
+struct vnotifier_block quota_notifier_block = {
+	.notifier_call = quota_notifier_call,
+	.priority = INT_MAX,
+};
+
+/* ----------------------------------------------------------------------
+ *
+ * Init/exit procedures
+ *
+ * ---------------------------------------------------------------------*/
+
+static int __init vzquota_init(void)
+{
+	int err;
+
+	if ((err = vzquota_cache_init()) != 0)
+		goto out_cache;
+
+	if ((err = vzquota_proc_init()) != 0)
+		goto out_proc;
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+	if ((err = vzquota_ugid_init()) != 0)
+		goto out_ugid;
+#endif
+
+	init_MUTEX(&vz_quota_sem);
+	vzioctl_register(&vzdqcalls);
+	virtinfo_notifier_register(VITYPE_QUOTA, &quota_notifier_block);
+#if defined(CONFIG_VZ_QUOTA_UGID) && defined(CONFIG_PROC_FS)
+	vzaquota_init();
+#endif
+
+	return 0;
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+out_ugid:
+	vzquota_proc_release();
+#endif
+out_proc:
+	vzquota_cache_release();
+out_cache:
+	return err;
+}
+
+#if defined(VZ_QUOTA_UNLOAD)
+static void __exit vzquota_release(void)
+{
+	virtinfo_notifier_unregister(VITYPE_QUOTA, &quota_notifier_block);
+	vzioctl_unregister(&vzdqcalls);
+#ifdef CONFIG_VZ_QUOTA_UGID
+#ifdef CONFIG_PROC_FS
+	vzaquota_fini();
+#endif
+	vzquota_ugid_release();
+#endif
+	vzquota_proc_release();
+	vzquota_cache_release();
+}
+#endif
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Disk Quota");
+MODULE_LICENSE("GPL v2");
+
+module_init(vzquota_init)
+#if defined(VZ_QUOTA_UNLOAD)
+module_exit(vzquota_release)
+#endif
diff -Nurap linux-2.6.9-100.orig/fs/xfs/linux-2.6/xfs_buf.c linux-2.6.9-ve023stab054/fs/xfs/linux-2.6/xfs_buf.c
--- linux-2.6.9-100.orig/fs/xfs/linux-2.6/xfs_buf.c	2004-10-19 01:53:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/xfs/linux-2.6/xfs_buf.c	2011-06-15 19:26:18.000000000 +0400
@@ -1666,8 +1666,8 @@ pagebuf_daemon(
 	INIT_LIST_HEAD(&tmp);
 	do {
 		/* swsusp */
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
diff -Nurap linux-2.6.9-100.orig/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.9-ve023stab054/fs/xfs/linux-2.6/xfs_iops.c
--- linux-2.6.9-100.orig/fs/xfs/linux-2.6/xfs_iops.c	2004-10-19 01:53:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/xfs/linux-2.6/xfs_iops.c	2011-06-15 19:26:19.000000000 +0400
@@ -464,7 +464,8 @@ STATIC int
 linvfs_permission(
 	struct inode	*inode,
 	int		mode,
-	struct nameidata *nd)
+	struct nameidata *nd,
+	struct exec_perm *exec_perm)
 {
 	vnode_t		*vp = LINVFS_GET_VP(inode);
 	int		error;
diff -Nurap linux-2.6.9-100.orig/fs/xfs/linux-2.6/xfs_super.c linux-2.6.9-ve023stab054/fs/xfs/linux-2.6/xfs_super.c
--- linux-2.6.9-100.orig/fs/xfs/linux-2.6/xfs_super.c	2004-10-19 01:55:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/fs/xfs/linux-2.6/xfs_super.c	2011-06-15 19:26:18.000000000 +0400
@@ -485,8 +485,8 @@ xfssyncd(
 		set_current_state(TASK_INTERRUPTIBLE);
 		timeleft = schedule_timeout(timeleft);
 		/* swsusp */
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 		if (vfsp->vfs_flag & VFS_UMOUNT)
 			break;
 
diff -Nurap linux-2.6.9-100.orig/include/asm-alpha/thread_info.h linux-2.6.9-ve023stab054/include/asm-alpha/thread_info.h
--- linux-2.6.9-100.orig/include/asm-alpha/thread_info.h	2004-10-19 01:55:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-alpha/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -77,6 +77,7 @@ register struct thread_info *__current_t
 #define TIF_UAC_NOPRINT		6	/* see sysinfo.h */
 #define TIF_UAC_NOFIX		7
 #define TIF_UAC_SIGBUS		8
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
diff -Nurap linux-2.6.9-100.orig/include/asm-arm/thread_info.h linux-2.6.9-ve023stab054/include/asm-arm/thread_info.h
--- linux-2.6.9-100.orig/include/asm-arm/thread_info.h	2004-10-19 01:54:08.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-arm/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -128,6 +128,7 @@ extern void iwmmxt_task_release(struct t
 #define TIF_SYSCALL_TRACE	8
 #define TIF_POLLING_NRFLAG	16
 #define TIF_USING_IWMMXT	17
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
diff -Nurap linux-2.6.9-100.orig/include/asm-arm26/thread_info.h linux-2.6.9-ve023stab054/include/asm-arm26/thread_info.h
--- linux-2.6.9-100.orig/include/asm-arm26/thread_info.h	2004-10-19 01:53:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-arm26/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -125,6 +125,7 @@ extern void free_thread_info(struct thre
 #define TIF_SYSCALL_TRACE	8
 #define TIF_USED_FPU		16
 #define TIF_POLLING_NRFLAG	17
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
diff -Nurap linux-2.6.9-100.orig/include/asm-cris/thread_info.h linux-2.6.9-ve023stab054/include/asm-cris/thread_info.h
--- linux-2.6.9-100.orig/include/asm-cris/thread_info.h	2004-10-19 01:55:28.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-cris/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -85,6 +85,7 @@ struct thread_info {
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
diff -Nurap linux-2.6.9-100.orig/include/asm-generic/tlb.h linux-2.6.9-ve023stab054/include/asm-generic/tlb.h
--- linux-2.6.9-100.orig/include/asm-generic/tlb.h	2004-10-19 01:53:05.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-generic/tlb.h	2011-06-15 19:26:19.000000000 +0400
@@ -110,6 +110,9 @@ tlb_is_full_mm(struct mmu_gather *tlb)
  *	handling the additional races in SMP caused by other CPUs caching valid
  *	mappings in their TLBs.
  */
+#include <ub/ub_mem.h>
+#include <ub/ub_vmpages.h>
+
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	tlb->need_flush = 1;
diff -Nurap linux-2.6.9-100.orig/include/asm-h8300/thread_info.h linux-2.6.9-ve023stab054/include/asm-h8300/thread_info.h
--- linux-2.6.9-100.orig/include/asm-h8300/thread_info.h	2004-10-19 01:55:36.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-h8300/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -93,6 +93,7 @@ static inline struct thread_info *curren
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_POLLING_NRFLAG	4	/* true if poll_idle() is polling
 					   TIF_NEED_RESCHED */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/a.out.h linux-2.6.9-ve023stab054/include/asm-i386/a.out.h
--- linux-2.6.9-100.orig/include/asm-i386/a.out.h	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/a.out.h	2011-06-15 19:26:19.000000000 +0400
@@ -19,7 +19,7 @@ struct exec
 
 #ifdef __KERNEL__
 
-#define STACK_TOP	TASK_SIZE
+#define STACK_TOP	(TASK_SIZE - PAGE_SIZE * 2)
 
 #endif
 
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/bug.h linux-2.6.9-ve023stab054/include/asm-i386/bug.h
--- linux-2.6.9-100.orig/include/asm-i386/bug.h	2004-10-19 01:53:22.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/bug.h	2011-06-15 19:26:18.000000000 +0400
@@ -12,7 +12,10 @@
 #if 1	/* Set to zero for a slightly smaller kernel */
 #define BUG()				\
  __asm__ __volatile__(	"ud2\n"		\
+		 	"\t.byte 0x66\n"\
+		 	"\t.byte 0xb8\n" /* mov $xxx, %ax */\
 			"\t.word %c0\n"	\
+			"\t.byte 0xb8\n" /* mov $xxx, %eax */\
 			"\t.long %c1\n"	\
 			 : : "i" (__LINE__), "i" (__FILE__))
 #else
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/elf.h linux-2.6.9-ve023stab054/include/asm-i386/elf.h
--- linux-2.6.9-100.orig/include/asm-i386/elf.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/elf.h	2011-06-15 19:26:20.000000000 +0400
@@ -108,7 +108,7 @@ typedef struct user_fxsr_struct elf_fpxr
    For the moment, we have only optimizations for the Intel generations,
    but that could change... */
 
-#define ELF_PLATFORM  (system_utsname.machine)
+#define ELF_PLATFORM  (ve_utsname.machine)
 
 /*
  * Architecture-neutral AT_ values in 0-17, leave some room
@@ -146,12 +146,24 @@ extern void __kernel_vsyscall;
 
 #define ARCH_DLINFO							\
 do {									\
-	if (VSYSCALL_BASE) {						\
+	if (sysctl_at_vsyscall && VSYSCALL_BASE) {			\
 		NEW_AUX_ENT(AT_SYSINFO,	VSYSCALL_ENTRY);		\
 		NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE);		\
 	}								\
 } while (0)
 
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+	int executable_stack, unsigned long map_address);
+
+#if 0	/* Disabled for exec-shield, where a normal vma holds the vDSO.  */
+/*
+ * this doesn't work with CPT.
+ * Process migrated from i686 can have vsyscall page at arbitrary
+ * address due to execshield.  -- dev@
+ */
+
 /*
  * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
  * extra segments containing the vsyscall DSO contents.  Dumping its
@@ -160,8 +172,8 @@ do {									\
  * Dumping its extra ELF program headers includes all the other information
  * a debugger needs to easily find how the vsyscall DSO was being used.
  */
-#define ELF_CORE_EXTRA_PHDRS		(__VSYSCALL_EHDR->e_phnum)
-#define ELF_CORE_WRITE_EXTRA_PHDRS					      \
+#define DO_ELF_CORE_EXTRA_PHDRS		(__VSYSCALL_EHDR->e_phnum)
+#define DO_ELF_CORE_WRITE_EXTRA_PHDRS					      \
 do {									      \
 	const struct elf_phdr *const vsyscall_phdrs =			      \
 		(const struct elf_phdr *) (__VSYSCALL_BASE		      \
@@ -183,7 +195,7 @@ do {									      \
 		DUMP_WRITE(&phdr, sizeof(phdr));			      \
 	}								      \
 } while (0)
-#define ELF_CORE_WRITE_EXTRA_DATA					      \
+#define DO_ELF_CORE_WRITE_EXTRA_DATA					      \
 do {									      \
 	const struct elf_phdr *const vsyscall_phdrs =			      \
 		(const struct elf_phdr *) (__VSYSCALL_BASE		      \
@@ -196,12 +208,22 @@ do {									      \
 	}								      \
 } while (0)
 
+#define ELF_CORE_EXTRA_PHDRS		({ (sysctl_vsyscall32 != 0 ? \
+		DO_ELF_CORE_EXTRA_PHDRS : 0); })
+
+#define ELF_CORE_WRITE_EXTRA_PHDRS	do {		\
+		if (sysctl_vsyscall32 != 0)		\
+			DO_ELF_CORE_WRITE_EXTRA_PHDRS;	\
+	} while (0)
+
+#define ELF_CORE_WRITE_EXTRA_DATA	do {		\
+		if (sysctl_vsyscall32 != 0)		\
+			DO_ELF_CORE_WRITE_EXTRA_DATA;	\
+	} while (0)
+#endif  /* #if 0 */
 #endif
 
 #define __HAVE_ARCH_RANDOMIZE_BRK
 extern void randomize_brk(unsigned long old_brk);
 
-#define __HAVE_ARCH_VSYSCALL
-extern void map_vsyscall(void);
-
 #endif
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/fixmap.h linux-2.6.9-ve023stab054/include/asm-i386/fixmap.h
--- linux-2.6.9-100.orig/include/asm-i386/fixmap.h	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/fixmap.h	2011-06-15 19:26:19.000000000 +0400
@@ -20,7 +20,6 @@
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
  */
-#define __FIXADDR_TOP	0xfffff000
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
@@ -30,6 +29,8 @@
 #include <linux/threads.h>
 #include <asm/kmap_types.h>
 
+#define __FIXADDR_TOP (0xfffff000UL)
+
 /*
  * Here we define all the compile-time 'special' virtual
  * addresses. The point is to have a constant address at
@@ -55,6 +56,10 @@
  * future, say framebuffers for the console driver(s) could be
  * fix-mapped?
  */
+
+#define TSS_SIZE	sizeof(struct tss_struct)
+#define FIX_TSS_COUNT	((TSS_SIZE * NR_CPUS + PAGE_SIZE - 1)/ PAGE_SIZE)
+
 enum fixed_addresses {
 	FIX_HOLE,
 	FIX_VSYSCALL,
@@ -76,17 +81,18 @@ enum fixed_addresses {
 	FIX_IDT,
 	FIX_GDT_1,
 	FIX_GDT_0,
-	FIX_TSS_3,
-	FIX_TSS_2,
-	FIX_TSS_1,
-	FIX_TSS_0,
+	FIX_TSS_LAST,
+	FIX_TSS_0 = FIX_TSS_LAST + FIX_TSS_COUNT - 1,
 	FIX_ENTRY_TRAMPOLINE_1,
 	FIX_ENTRY_TRAMPOLINE_0,
 #ifdef CONFIG_X86_CYCLONE_TIMER
 	FIX_CYCLONE_TIMER, /*cyclone timer register*/
 	FIX_VSTACK_HOLE_2,
 #endif 
-	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+	/* reserved pte's for temporary kernel mappings */
+	__FIX_KMAP_BEGIN,
+	FIX_KMAP_BEGIN = __FIX_KMAP_BEGIN + (__FIX_KMAP_BEGIN & 1) +
+		((__FIXADDR_TOP >> PAGE_SHIFT) & 1),
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
 #ifdef CONFIG_ACPI_BOOT
 	FIX_ACPI_BEGIN,
@@ -124,7 +130,7 @@ extern void __set_fixmap (enum fixed_add
 
  /* IMPORTANT: we have to align FIXADDR_TOP so that the virtual stack */
  /* is THREAD_SIZE aligned. */
-#define FIXADDR_TOP	(((unsigned long)__FIXADDR_TOP) & ~(THREAD_SIZE-1))
+#define FIXADDR_TOP	__FIXADDR_TOP
 
 #define __FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START	(FIXADDR_TOP - __FIXADDR_SIZE)
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/kmap_types.h linux-2.6.9-ve023stab054/include/asm-i386/kmap_types.h
--- linux-2.6.9-100.orig/include/asm-i386/kmap_types.h	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/kmap_types.h	2011-06-15 19:26:19.000000000 +0400
@@ -11,7 +11,8 @@ enum km_type {
 	 */
 	KM_BOUNCE_READ,
 	KM_VSTACK_BASE,
-	KM_VSTACK_TOP = KM_VSTACK_BASE + STACK_PAGE_COUNT-1,
+	__KM_VSTACK_TOP = KM_VSTACK_BASE + STACK_PAGE_COUNT-1,
+	KM_VSTACK_TOP = __KM_VSTACK_TOP + (__KM_VSTACK_TOP % 2),
 
 	KM_LDT_PAGE15,
 	KM_LDT_PAGE0 = KM_LDT_PAGE15 + 16-1,
@@ -31,7 +32,8 @@ enum km_type {
 	KM_SOFTIRQ1,
 	KM_CRASHDUMP,
 	KM_UNUSED,
-	KM_TYPE_NR
+	__KM_TYPE_NR,
+	KM_TYPE_NR=__KM_TYPE_NR + (__KM_TYPE_NR % 2)
 };
 
 #endif
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/mman.h linux-2.6.9-ve023stab054/include/asm-i386/mman.h
--- linux-2.6.9-100.orig/include/asm-i386/mman.h	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/mman.h	2011-06-15 19:26:19.000000000 +0400
@@ -22,6 +22,7 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO	0x80000		/* map from exec - try not to fail */
 
 #define MS_ASYNC	1		/* sync memory asynchronously */
 #define MS_INVALIDATE	2		/* invalidate the caches */
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/nmi.h linux-2.6.9-ve023stab054/include/asm-i386/nmi.h
--- linux-2.6.9-100.orig/include/asm-i386/nmi.h	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/nmi.h	2011-06-15 19:26:22.000000000 +0400
@@ -17,6 +17,7 @@ typedef int (*nmi_callback_t)(struct pt_
  * set. Return 1 if the NMI was handled.
  */
 void set_nmi_callback(nmi_callback_t callback);
+void set_nmi_ipi_callback(nmi_callback_t callback);
  
 /** 
  * unset_nmi_callback
@@ -24,6 +25,7 @@ void set_nmi_callback(nmi_callback_t cal
  * Remove the handler previously set.
  */
 void unset_nmi_callback(void);
+void unset_nmi_ipi_callback(void);
 
 /**
  * do_nmi_callback
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/page.h linux-2.6.9-ve023stab054/include/asm-i386/page.h
--- linux-2.6.9-100.orig/include/asm-i386/page.h	2011-06-09 19:22:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/page.h	2011-06-15 19:26:19.000000000 +0400
@@ -101,7 +101,7 @@ typedef struct { unsigned long pgprot; }
 
 #ifdef CONFIG_X86_4G_VM_LAYOUT
 #define __PAGE_OFFSET		(0x02000000)
-#define TASK_SIZE		((current->personality & 0x8000000) ? 0xc0000000 : 0xff000000)
+#define TASK_SIZE		(0xc0000000)
 #else
 #define __PAGE_OFFSET		(0xc0000000)
 #define TASK_SIZE		(0xc0000000)
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/processor.h linux-2.6.9-ve023stab054/include/asm-i386/processor.h
--- linux-2.6.9-100.orig/include/asm-i386/processor.h	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/processor.h	2011-06-15 19:26:19.000000000 +0400
@@ -84,8 +84,6 @@ struct cpuinfo_x86 {
 
 extern struct cpuinfo_x86 boot_cpu_data;
 extern struct cpuinfo_x86 new_cpu_data;
-extern struct tss_struct init_tss[NR_CPUS];
-extern struct tss_struct doublefault_tss;
 
 #ifdef CONFIG_SMP
 extern struct cpuinfo_x86 cpu_data[];
@@ -319,7 +317,6 @@ extern unsigned long arch_align_stack(un
 #define IO_BITMAP_BITS  1024
 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
 #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
 #define INVALID_IO_BITMAP_OFFSET 0x8000
 #define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
 
@@ -425,16 +422,14 @@ struct tss_struct {
 
 #define ARCH_MIN_TASKALIGN	16
 
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
 
-#define STACK_PAGE_COUNT	(4096/PAGE_SIZE)
-
-
-
+extern struct tss_struct init_tss[NR_CPUS];
+extern struct tss_struct doublefault_tss;
 
 struct thread_struct {
 /* cached TLS descriptors. */
 	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
-	void *stack_page[STACK_PAGE_COUNT];
 	unsigned long	esp0;
 	unsigned long	sysenter_cs;
 	unsigned long	eip;
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/spinlock.h linux-2.6.9-ve023stab054/include/asm-i386/spinlock.h
--- linux-2.6.9-100.orig/include/asm-i386/spinlock.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/spinlock.h	2011-06-15 19:26:22.000000000 +0400
@@ -86,7 +86,10 @@ typedef struct {
 static inline void _raw_spin_unlock(spinlock_t *lock)
 {
 #ifdef CONFIG_DEBUG_SPINLOCK
-	BUG_ON(lock->magic != SPINLOCK_MAGIC);
+	if (lock->magic != SPINLOCK_MAGIC) {
+		printk("lock %p magic %x\n", lock, lock->magic);
+		dump_stack();
+	}
 	BUG_ON(!spin_is_locked(lock));
 #endif
 	__asm__ __volatile__(
@@ -105,7 +108,10 @@ static inline void _raw_spin_unlock(spin
 {
 	char oldval = 1;
 #ifdef CONFIG_DEBUG_SPINLOCK
-	BUG_ON(lock->magic != SPINLOCK_MAGIC);
+	if (lock->magic != SPINLOCK_MAGIC) {
+		printk("lock %p magic %x\n", lock, lock->magic);
+		dump_stack();
+	}
 	BUG_ON(!spin_is_locked(lock));
 #endif
 	__asm__ __volatile__(
@@ -129,8 +135,8 @@ static inline void _raw_spin_lock(spinlo
 {
 #ifdef CONFIG_DEBUG_SPINLOCK
 	if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
-		printk("eip: %p\n", __builtin_return_address(0));
-		BUG();
+		printk("eip: %p, lock %p magic %x\n", __builtin_return_address(0), lock, lock->magic);
+		dump_stack();
 	}
 #endif
 	__asm__ __volatile__(
@@ -142,8 +148,8 @@ static inline void _raw_spin_lock_flags 
 {
 #ifdef CONFIG_DEBUG_SPINLOCK
 	if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
-		printk("eip: %p\n", __builtin_return_address(0));
-		BUG();
+		printk("eip: %p, magic %x\n", __builtin_return_address(0), lock->magic);
+ 		dump_stack();
 	}
 #endif
 	__asm__ __volatile__(
@@ -196,7 +202,10 @@ typedef struct {
 static inline void _raw_read_lock(rwlock_t *rw)
 {
 #ifdef CONFIG_DEBUG_SPINLOCK
-	BUG_ON(rw->magic != RWLOCK_MAGIC);
+	if (rw->magic != RWLOCK_MAGIC) {
+		printk("magic %x\n", rw->magic);
+		dump_stack();
+	}
 #endif
 	__build_read_lock(rw, "__read_lock_failed");
 }
@@ -204,7 +213,10 @@ static inline void _raw_read_lock(rwlock
 static inline void _raw_write_lock(rwlock_t *rw)
 {
 #ifdef CONFIG_DEBUG_SPINLOCK
-	BUG_ON(rw->magic != RWLOCK_MAGIC);
+	if (rw->magic != RWLOCK_MAGIC) {
+		printk("magic %x\n", rw->magic);
+		dump_stack();
+	}
 #endif
 	__build_write_lock(rw, "__write_lock_failed");
 }
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/string.h linux-2.6.9-ve023stab054/include/asm-i386/string.h
--- linux-2.6.9-100.orig/include/asm-i386/string.h	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/string.h	2011-06-15 19:26:18.000000000 +0400
@@ -140,7 +140,8 @@ __asm__ __volatile__(
 	"orb $1,%%al\n"
 	"3:"
 	:"=a" (__res), "=&S" (d0), "=&D" (d1)
-		     :"1" (cs),"2" (ct));
+	:"1" (cs),"2" (ct)
+	:"memory");
 return __res;
 }
 
@@ -162,8 +163,9 @@ __asm__ __volatile__(
 	"3:\tsbbl %%eax,%%eax\n\t"
 	"orb $1,%%al\n"
 	"4:"
-		     :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
-		     :"1" (cs),"2" (ct),"3" (count));
+	:"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+	:"1" (cs),"2" (ct),"3" (count)
+	:"memory");
 return __res;
 }
 
@@ -182,7 +184,9 @@ __asm__ __volatile__(
 	"movl $1,%1\n"
 	"2:\tmovl %1,%0\n\t"
 	"decl %0"
-	:"=a" (__res), "=&S" (d0) : "1" (s),"0" (c));
+	:"=a" (__res), "=&S" (d0)
+	:"1" (s),"0" (c)
+	:"memory");
 return __res;
 }
 
@@ -199,7 +203,9 @@ __asm__ __volatile__(
 	"leal -1(%%esi),%0\n"
 	"2:\ttestb %%al,%%al\n\t"
 	"jne 1b"
-	:"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c));
+	:"=g" (__res), "=&S" (d0), "=&a" (d1)
+	:"0" (0),"1" (s),"2" (c)
+	:"memory");
 return __res;
 }
 
@@ -215,7 +221,9 @@ __asm__ __volatile__(
 	"scasb\n\t"
 	"notl %0\n\t"
 	"decl %0"
-	:"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffffu));
+	:"=c" (__res), "=&D" (d0)
+	:"1" (s),"a" (0), "0" (0xffffffffu)
+	:"memory");
 return __res;
 }
 
@@ -326,7 +334,9 @@ __asm__ __volatile__(
 	"je 1f\n\t"
 	"movl $1,%0\n"
 	"1:\tdecl %0"
-	:"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count));
+	:"=D" (__res), "=&c" (d0)
+	:"a" (c),"0" (cs),"1" (count)
+	:"memory");
 return __res;
 }
 
@@ -362,7 +372,7 @@ __asm__ __volatile__(
 	"je 2f\n\t"
 	"stosb\n"
 	"2:"
-	: "=&c" (d0), "=&D" (d1)
+	:"=&c" (d0), "=&D" (d1)
 	:"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
 	:"memory");
 return (s);	
@@ -385,7 +395,8 @@ __asm__ __volatile__(
 	"jne 1b\n"
 	"3:\tsubl %2,%0"
 	:"=a" (__res), "=&d" (d0)
-	:"c" (s),"1" (count));
+	:"c" (s),"1" (count)
+	:"memory");
 return __res;
 }
 /* end of additional stuff */
@@ -466,7 +477,8 @@ static inline void * memscan(void * addr
 		"dec %%edi\n"
 		"1:"
 		: "=D" (addr), "=c" (size)
-		: "0" (addr), "1" (size), "a" (c));
+		: "0" (addr), "1" (size), "a" (c)
+		: "memory");
 	return addr;
 }
 
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/thread_info.h linux-2.6.9-ve023stab054/include/asm-i386/thread_info.h
--- linux-2.6.9-100.orig/include/asm-i386/thread_info.h	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -16,6 +16,15 @@
 #include <asm/processor.h>
 #endif
 
+#define PREEMPT_ACTIVE		0x4000000
+#ifdef CONFIG_4KSTACKS
+#define THREAD_SIZE		(4096)
+#else
+#define THREAD_SIZE		(8192)
+#endif
+#define STACK_PAGE_COUNT	(THREAD_SIZE/PAGE_SIZE)
+#define STACK_WARN             (THREAD_SIZE/8)
+
 /*
  * low level task data that entry.S needs immediate access to
  * - this struct should fit entirely inside of one cache line
@@ -39,6 +48,7 @@ struct thread_info {
 						*/
 	void			*sysenter_return;
 	void			*real_stack, *virtual_stack, *user_pgd;
+	void			*stack_page[STACK_PAGE_COUNT];
 	struct restart_block    restart_block;
 
 	unsigned long           previous_esp;   /* ESP of the previous stack in case
@@ -53,14 +63,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x4000000
-#ifdef CONFIG_4KSTACKS
-#define THREAD_SIZE            (4096)
-#else
-#define THREAD_SIZE		(8192)
-#endif
-
-#define STACK_WARN             (THREAD_SIZE/8)
 /*
  * macros/functions for gaining access to the thread information structure
  *
@@ -108,13 +110,13 @@ static inline unsigned long current_stac
 	({							\
 		struct thread_info *ret;			\
 								\
-		ret = kmalloc(THREAD_SIZE, GFP_KERNEL);		\
+		ret = kmalloc(THREAD_SIZE, GFP_KERNEL_UBC);	\
 		if (ret)					\
 			memset(ret, 0, THREAD_SIZE);		\
 		ret;						\
 	})
 #else
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL_UBC)
 #endif
 
 #define free_thread_info(info)	kfree(info)
@@ -149,6 +151,8 @@ static inline unsigned long current_stac
 #define TIF_DB7			6	/* has debug registers */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_FREEZE		17	/* Freeze request, atomic version of PF_FREEZE */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/timex.h linux-2.6.9-ve023stab054/include/asm-i386/timex.h
--- linux-2.6.9-100.orig/include/asm-i386/timex.h	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/timex.h	2011-06-15 19:26:19.000000000 +0400
@@ -43,8 +43,7 @@ static inline cycles_t get_cycles (void)
 	unsigned long long ret=0;
 
 #ifndef CONFIG_X86_TSC
-	if (!cpu_has_tsc)
-		return 0;
+#error "CONFIG_X86_TCS is not set!"
 #endif
 
 #if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/uaccess.h linux-2.6.9-ve023stab054/include/asm-i386/uaccess.h
--- linux-2.6.9-100.orig/include/asm-i386/uaccess.h	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/uaccess.h	2011-06-15 19:26:19.000000000 +0400
@@ -156,18 +156,28 @@ extern int zero_user_size(unsigned int s
 extern int copy_str_fromuser_size(unsigned int size, void *val, const void *ptr);
 extern int strlen_fromuser_size(unsigned int size, const void *ptr);
 
-
+/*
+ * GCC 2.96 has stupid bug which forces us to use volatile or barrier below.
+ * without volatile or barrier compiler generates ABSOLUTELY wrong code which
+ * igonores XXX_size function return code, but generates EFAULT :)))
+ * the bug was found in sys_utime()
+ */
 # define indirect_get_user(x,ptr)					\
 ({	int __ret_gu,__val_gu;						\
 	__typeof__(ptr) __ptr_gu = (ptr);				\
 	__ret_gu = get_user_size(sizeof(*__ptr_gu), &__val_gu,__ptr_gu) ? -EFAULT : 0;\
+	barrier();							\
 	(x) = (__typeof__(*__ptr_gu))__val_gu;				\
 	__ret_gu;							\
 })
 #define indirect_put_user(x,ptr)					\
 ({									\
+	int __ret_pu;							\
 	__typeof__(*(ptr)) *__ptr_pu = (ptr), __x_pu = (x);		\
-	put_user_size(sizeof(*__ptr_pu), &__x_pu, __ptr_pu) ? -EFAULT : 0; \
+	__ret_pu = put_user_size(sizeof(*__ptr_pu),			\
+		&__x_pu, __ptr_pu) ? -EFAULT : 0;			\
+	barrier();							\
+	__ret_pu;							\
 })
 #define __indirect_put_user indirect_put_user
 #define __indirect_get_user indirect_get_user
diff -Nurap linux-2.6.9-100.orig/include/asm-i386/unistd.h linux-2.6.9-ve023stab054/include/asm-i386/unistd.h
--- linux-2.6.9-100.orig/include/asm-i386/unistd.h	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-i386/unistd.h	2011-06-15 19:26:22.000000000 +0400
@@ -293,8 +293,19 @@
 #define __NR_add_key		286
 #define __NR_request_key	287
 #define __NR_keyctl		288
-
-#define NR_syscalls 289
+#define __NR_fairsched_mknod	500     /* FairScheduler syscalls */
+#define __NR_fairsched_rmnod	501
+#define __NR_fairsched_chwt	502
+#define __NR_fairsched_mvpr	503
+#define __NR_fairsched_rate	504
+#define __NR_fairsched_vcpus	505
+#define __NR_getluid		510
+#define __NR_setluid		511
+#define __NR_setublimit		512
+#define __NR_ubstat		513
+#define __NR_lchmod		516
+#define __NR_lutime		517
+#define NR_syscalls 517
 
 #ifndef __KERNEL_SYSCALLS_NO_ERRNO__
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
diff -Nurap linux-2.6.9-100.orig/include/asm-ia64/mman.h linux-2.6.9-ve023stab054/include/asm-ia64/mman.h
--- linux-2.6.9-100.orig/include/asm-ia64/mman.h	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-ia64/mman.h	2011-06-15 19:26:19.000000000 +0400
@@ -30,6 +30,7 @@
 #define MAP_NORESERVE	0x04000		/* don't check for reservations */
 #define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO	0x80000		/* map from exec - try not to fail */
 
 #define MS_ASYNC	1		/* sync memory asynchronously */
 #define MS_INVALIDATE	2		/* invalidate the caches */
diff -Nurap linux-2.6.9-100.orig/include/asm-ia64/pgalloc.h linux-2.6.9-ve023stab054/include/asm-ia64/pgalloc.h
--- linux-2.6.9-100.orig/include/asm-ia64/pgalloc.h	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-ia64/pgalloc.h	2011-06-15 19:26:19.000000000 +0400
@@ -145,7 +145,8 @@ pmd_populate_kernel (struct mm_struct *m
 static inline struct page *
 pte_alloc_one (struct mm_struct *mm, unsigned long addr)
 {
-	struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	struct page *pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|
+					__GFP_REPEAT, 0);
 
 	if (likely(pte != NULL))
 		clear_page(page_address(pte));
diff -Nurap linux-2.6.9-100.orig/include/asm-ia64/processor.h linux-2.6.9-ve023stab054/include/asm-ia64/processor.h
--- linux-2.6.9-100.orig/include/asm-ia64/processor.h	2011-06-09 19:22:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-ia64/processor.h	2011-06-15 19:26:19.000000000 +0400
@@ -323,7 +323,7 @@ struct thread_struct {
 	regs->loadrs = 0;									\
 	regs->r8 = current->mm->dumpable;	/* set "don't zap registers" flag */		\
 	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
-	if (unlikely(!current->mm->dumpable)) {							\
+	if (unlikely(!current->mm->dumpable || !current->mm->vps_dumpable)) {			\
 		/*										\
 		 * Zap scratch regs to avoid leaking bits between processes with different	\
 		 * uid/privileges.								\
diff -Nurap linux-2.6.9-100.orig/include/asm-ia64/system.h linux-2.6.9-ve023stab054/include/asm-ia64/system.h
--- linux-2.6.9-100.orig/include/asm-ia64/system.h	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-ia64/system.h	2011-06-15 19:26:19.000000000 +0400
@@ -279,7 +279,7 @@ do {						\
 	spin_lock(&(next)->switch_lock);	\
 	spin_unlock(&(rq)->lock);		\
 } while (0)
-#define finish_arch_switch(rq, prev)	spin_unlock_irq(&(prev)->switch_lock)
+#define finish_arch_switch(rq, prev)	spin_unlock(&(prev)->switch_lock)
 #define task_running(rq, p) 		((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
 
 #define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
diff -Nurap linux-2.6.9-100.orig/include/asm-ia64/thread_info.h linux-2.6.9-ve023stab054/include/asm-ia64/thread_info.h
--- linux-2.6.9-100.orig/include/asm-ia64/thread_info.h	2004-10-19 01:55:36.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-ia64/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -67,6 +67,8 @@ struct thread_info {
 #define TIF_SYSCALL_TRACE	3	/* syscall trace active */
 #define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_FREEZE		17	/* Freeze request, atomic version of PF_FREEZE */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define TIF_WORK_MASK		0x7	/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE */
 #define TIF_ALLWORK_MASK	0x1f	/* bits 0..4 are "work to do on user-return" bits */
diff -Nurap linux-2.6.9-100.orig/include/asm-ia64/timex.h linux-2.6.9-ve023stab054/include/asm-ia64/timex.h
--- linux-2.6.9-100.orig/include/asm-ia64/timex.h	2004-10-19 01:55:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-ia64/timex.h	2011-06-15 19:26:19.000000000 +0400
@@ -10,11 +10,14 @@
  *			Also removed cacheflush_time as it's entirely unused.
  */
 
-#include <asm/intrinsics.h>
-#include <asm/processor.h>
+extern unsigned int cpu_khz;
 
 typedef unsigned long cycles_t;
 
+#ifdef __KERNEL__
+#include <asm/intrinsics.h>
+#include <asm/processor.h>
+
 /*
  * For performance reasons, we don't want to define CLOCK_TICK_TRATE as
  * local_cpu_data->itc_rate.  Fortunately, we don't have to, either: according to George
@@ -37,4 +40,5 @@ get_cycles (void)
 	return ret;
 }
 
+#endif /* __KERNEL__ */
 #endif /* _ASM_IA64_TIMEX_H */
diff -Nurap linux-2.6.9-100.orig/include/asm-ia64/unistd.h linux-2.6.9-ve023stab054/include/asm-ia64/unistd.h
--- linux-2.6.9-100.orig/include/asm-ia64/unistd.h	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-ia64/unistd.h	2011-06-15 19:26:22.000000000 +0400
@@ -266,11 +266,24 @@
 
 /* 1274 -- 1303 reserved*/
 #define __NR_getcpu			1304
+#define __NR_fairsched_vcpus		1499
+#define __NR_fairsched_mknod		1500
+#define __NR_fairsched_rmnod		1501
+#define __NR_fairsched_chwt		1502
+#define __NR_fairsched_mvpr		1503
+#define __NR_fairsched_rate		1504
+#define __NR_getluid			1505
+#define __NR_setluid			1506
+#define __NR_setublimit			1507
+#define __NR_ubstat			1508
+#define __NR_lchmod			1509
+#define __NR_lutime			1510
+
 #ifdef __KERNEL__
 
 #include <linux/config.h>
 
-#define NR_syscalls			281 /* length of syscall table */
+#define NR_syscalls	(__NR_lutime - __NR_ni_syscall + 1) /* length of syscall table */
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
diff -Nurap linux-2.6.9-100.orig/include/asm-m32r/thread_info.h linux-2.6.9-ve023stab054/include/asm-m32r/thread_info.h
--- linux-2.6.9-100.orig/include/asm-m32r/thread_info.h	2004-10-19 01:55:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-m32r/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -123,6 +123,7 @@ static inline struct thread_info *curren
 #define TIF_SINGLESTEP		4	/* restore singlestep on return to user mode */
 #define TIF_IRET		5	/* return with iret */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
diff -Nurap linux-2.6.9-100.orig/include/asm-m68k/thread_info.h linux-2.6.9-ve023stab054/include/asm-m68k/thread_info.h
--- linux-2.6.9-100.orig/include/asm-m68k/thread_info.h	2004-10-19 01:54:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-m68k/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -48,6 +48,7 @@ struct thread_info {
 #define TIF_NOTIFY_RESUME	2	/* resumption notification requested */
 #define TIF_SIGPENDING		3	/* signal pending */
 #define TIF_NEED_RESCHED	4	/* rescheduling necessary */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 extern int thread_flag_fixme(void);
 
diff -Nurap linux-2.6.9-100.orig/include/asm-m68knommu/thread_info.h linux-2.6.9-ve023stab054/include/asm-m68knommu/thread_info.h
--- linux-2.6.9-100.orig/include/asm-m68knommu/thread_info.h	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-m68knommu/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -91,6 +91,7 @@ static inline struct thread_info *curren
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_POLLING_NRFLAG	4	/* true if poll_idle() is polling
 					   TIF_NEED_RESCHED */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
diff -Nurap linux-2.6.9-100.orig/include/asm-mips/system.h linux-2.6.9-ve023stab054/include/asm-mips/system.h
--- linux-2.6.9-100.orig/include/asm-mips/system.h	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-mips/system.h	2011-06-15 19:26:19.000000000 +0400
@@ -496,7 +496,7 @@ do {						\
 	spin_lock(&(next)->switch_lock);	\
 	spin_unlock(&(rq)->lock);		\
 } while (0)
-#define finish_arch_switch(rq, prev)	spin_unlock_irq(&(prev)->switch_lock)
+#define finish_arch_switch(rq, prev)	spin_unlock(&(prev)->switch_lock)
 #define task_running(rq, p) 		((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
 
 #endif /* _ASM_SYSTEM_H */
diff -Nurap linux-2.6.9-100.orig/include/asm-mips/thread_info.h linux-2.6.9-ve023stab054/include/asm-mips/thread_info.h
--- linux-2.6.9-100.orig/include/asm-mips/thread_info.h	2004-10-19 01:53:05.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-mips/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -116,6 +116,7 @@ register struct thread_info *__current_t
 #define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 #define TIF_SYSCALL_TRACE	31	/* syscall trace active */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
diff -Nurap linux-2.6.9-100.orig/include/asm-parisc/thread_info.h linux-2.6.9-ve023stab054/include/asm-parisc/thread_info.h
--- linux-2.6.9-100.orig/include/asm-parisc/thread_info.h	2004-10-19 01:53:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-parisc/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -60,6 +60,7 @@ struct thread_info {
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_POLLING_NRFLAG	4	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_32BIT               5       /* 32 bit binary */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
diff -Nurap linux-2.6.9-100.orig/include/asm-ppc/thread_info.h linux-2.6.9-ve023stab054/include/asm-ppc/thread_info.h
--- linux-2.6.9-100.orig/include/asm-ppc/thread_info.h	2004-10-19 01:53:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-ppc/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -76,6 +76,8 @@ static inline struct thread_info *curren
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_POLLING_NRFLAG	4	/* true if poll_idle() is polling
 					   TIF_NEED_RESCHED */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
+
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
diff -Nurap linux-2.6.9-100.orig/include/asm-ppc64/thread_info.h linux-2.6.9-ve023stab054/include/asm-ppc64/thread_info.h
--- linux-2.6.9-100.orig/include/asm-ppc64/thread_info.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-ppc64/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -97,6 +97,7 @@ static inline struct thread_info *curren
 #define TIF_RUN_LIGHT		6	/* iSeries run light */
 #define TIF_SYSCALL_AUDIT	8	/* syscall auditing active */
 #define TIF_SINGLESTEP		9	/* singlestepping active */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
diff -Nurap linux-2.6.9-100.orig/include/asm-s390/system.h linux-2.6.9-ve023stab054/include/asm-s390/system.h
--- linux-2.6.9-100.orig/include/asm-s390/system.h	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-s390/system.h	2011-06-15 19:26:19.000000000 +0400
@@ -108,7 +108,7 @@ static inline void restore_access_regs(u
 #define task_running(rq, p)		((rq)->curr == (p))
 #define finish_arch_switch(rq, prev) do {				     \
 	set_fs(current->thread.mm_segment);				     \
-	spin_unlock_irq(&(rq)->lock);					     \
+	spin_unlock(&(rq)->lock);					     \
 } while (0)
 
 #define nop() __asm__ __volatile__ ("nop")
diff -Nurap linux-2.6.9-100.orig/include/asm-s390/thread_info.h linux-2.6.9-ve023stab054/include/asm-s390/thread_info.h
--- linux-2.6.9-100.orig/include/asm-s390/thread_info.h	2004-10-19 01:54:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-s390/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -100,6 +100,7 @@ static inline struct thread_info *curren
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling 
 					   TIF_NEED_RESCHED */
 #define TIF_31BIT		18	/* 32bit process */ 
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
diff -Nurap linux-2.6.9-100.orig/include/asm-sh/thread_info.h linux-2.6.9-ve023stab054/include/asm-sh/thread_info.h
--- linux-2.6.9-100.orig/include/asm-sh/thread_info.h	2004-10-19 01:54:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-sh/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -93,6 +93,7 @@ static inline struct thread_info *curren
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 #define TIF_USERSPACE		31	/* true if FS sets userspace */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
diff -Nurap linux-2.6.9-100.orig/include/asm-sh64/thread_info.h linux-2.6.9-ve023stab054/include/asm-sh64/thread_info.h
--- linux-2.6.9-100.orig/include/asm-sh64/thread_info.h	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-sh64/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -74,6 +74,7 @@ static inline struct thread_info *curren
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define THREAD_SIZE	16384
 
diff -Nurap linux-2.6.9-100.orig/include/asm-sparc/system.h linux-2.6.9-ve023stab054/include/asm-sparc/system.h
--- linux-2.6.9-100.orig/include/asm-sparc/system.h	2004-10-19 01:54:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-sparc/system.h	2011-06-15 19:26:19.000000000 +0400
@@ -109,7 +109,7 @@ extern void fpsave(unsigned long *fpregs
 	"save %sp, -0x40, %sp\n\t" \
 	"restore; restore; restore; restore; restore; restore; restore"); \
 } while(0)
-#define finish_arch_switch(rq, next)	spin_unlock_irq(&(rq)->lock)
+#define finish_arch_switch(rq, next)	spin_unlock(&(rq)->lock)
 #define task_running(rq, p)		((rq)->curr == (p))
 
 	/* Much care has gone into this code, do not touch it.
diff -Nurap linux-2.6.9-100.orig/include/asm-sparc/thread_info.h linux-2.6.9-ve023stab054/include/asm-sparc/thread_info.h
--- linux-2.6.9-100.orig/include/asm-sparc/thread_info.h	2004-10-19 01:55:28.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-sparc/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -138,6 +138,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, 
 					 * this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	9	/* true if poll_idle() is polling
 					 * TIF_NEED_RESCHED */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
diff -Nurap linux-2.6.9-100.orig/include/asm-sparc64/system.h linux-2.6.9-ve023stab054/include/asm-sparc64/system.h
--- linux-2.6.9-100.orig/include/asm-sparc64/system.h	2004-10-19 01:53:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-sparc64/system.h	2011-06-15 19:26:19.000000000 +0400
@@ -146,7 +146,7 @@ do {	spin_lock(&(next)->switch_lock);	\
 } while (0)
 
 #define finish_arch_switch(rq, prev)		\
-do {	spin_unlock_irq(&(prev)->switch_lock);	\
+do {	spin_unlock(&(prev)->switch_lock);	\
 } while (0)
 
 #define task_running(rq, p) \
diff -Nurap linux-2.6.9-100.orig/include/asm-sparc64/thread_info.h linux-2.6.9-ve023stab054/include/asm-sparc64/thread_info.h
--- linux-2.6.9-100.orig/include/asm-sparc64/thread_info.h	2004-10-19 01:54:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-sparc64/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -228,6 +228,7 @@ register struct thread_info *current_thr
  *       an immediate value in instructions such as andcc.
  */
 #define TIF_ABI_PENDING		12
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
diff -Nurap linux-2.6.9-100.orig/include/asm-um/thread_info.h linux-2.6.9-ve023stab054/include/asm-um/thread_info.h
--- linux-2.6.9-100.orig/include/asm-um/thread_info.h	2004-10-19 01:53:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-um/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -70,6 +70,7 @@ static inline struct thread_info *curren
 					 * TIF_NEED_RESCHED 
 					 */
 #define TIF_RESTART_BLOCK 	4
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
diff -Nurap linux-2.6.9-100.orig/include/asm-v850/thread_info.h linux-2.6.9-ve023stab054/include/asm-v850/thread_info.h
--- linux-2.6.9-100.orig/include/asm-v850/thread_info.h	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-v850/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -83,6 +83,7 @@ struct thread_info {
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_POLLING_NRFLAG	4	/* true if poll_idle() is polling
 					   TIF_NEED_RESCHED */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/a.out.h linux-2.6.9-ve023stab054/include/asm-x86_64/a.out.h
--- linux-2.6.9-100.orig/include/asm-x86_64/a.out.h	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/a.out.h	2011-06-15 19:26:18.000000000 +0400
@@ -21,7 +21,7 @@ struct exec
 
 #ifdef __KERNEL__
 #include <linux/thread_info.h>
-#define STACK_TOP (test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE)
+#define STACK_TOP TASK_SIZE
 #endif
 
 #endif /* __A_OUT_GNU_H__ */
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/ia32.h linux-2.6.9-ve023stab054/include/asm-x86_64/ia32.h
--- linux-2.6.9-100.orig/include/asm-x86_64/ia32.h	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/ia32.h	2011-06-15 19:26:19.000000000 +0400
@@ -157,7 +157,7 @@ struct ustat32 {
 	char			f_fpack[6];
 };
 
-#define IA32_STACK_TOP IA32_PAGE_OFFSET
+#define IA32_STACK_TOP (IA32_PAGE_OFFSET - PAGE_SIZE * 2)
 
 #ifdef __KERNEL__
 struct user_desc;
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/kdebug.h linux-2.6.9-ve023stab054/include/asm-x86_64/kdebug.h
--- linux-2.6.9-100.orig/include/asm-x86_64/kdebug.h	2011-06-09 19:22:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/kdebug.h	2011-06-15 19:26:22.000000000 +0400
@@ -42,7 +42,7 @@ static inline int notify_die(enum die_va
 	return notifier_call_chain(&die_chain, val, &args); 
 } 
 
-extern int printk_address(unsigned long address);
+extern void printk_address(unsigned long address);
 extern void die(const char *,struct pt_regs *,long);
 extern void __die(const char *,struct pt_regs *,long);
 extern void show_registers(struct pt_regs *regs);
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/mman.h linux-2.6.9-ve023stab054/include/asm-x86_64/mman.h
--- linux-2.6.9-100.orig/include/asm-x86_64/mman.h	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/mman.h	2011-06-15 19:26:19.000000000 +0400
@@ -23,6 +23,7 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO    0x80000         /* map from exec - try not to fail */
 
 #define MS_ASYNC	1		/* sync memory asynchronously */
 #define MS_INVALIDATE	2		/* invalidate the caches */
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/mmu.h linux-2.6.9-ve023stab054/include/asm-x86_64/mmu.h
--- linux-2.6.9-100.orig/include/asm-x86_64/mmu.h	2004-10-19 01:54:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/mmu.h	2011-06-15 19:26:19.000000000 +0400
@@ -15,6 +15,7 @@ typedef struct { 
 	rwlock_t ldtlock; 
 	int size;
 	struct semaphore sem; 
+	void *vdso;
 } mm_context_t;
 
 #endif
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/nmi.h linux-2.6.9-ve023stab054/include/asm-x86_64/nmi.h
--- linux-2.6.9-100.orig/include/asm-x86_64/nmi.h	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/nmi.h	2011-06-15 19:26:22.000000000 +0400
@@ -25,6 +25,9 @@ void set_nmi_callback(nmi_callback_t cal
  */
 void unset_nmi_callback(void);
 
+void set_nmi_ipi_callback(nmi_callback_t callback);
+void unset_nmi_ipi_callback(void);
+
 /**
  * do_nmi_callback
  *
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/pgalloc.h linux-2.6.9-ve023stab054/include/asm-x86_64/pgalloc.h
--- linux-2.6.9-100.orig/include/asm-x86_64/pgalloc.h	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/pgalloc.h	2011-06-15 19:26:19.000000000 +0400
@@ -35,12 +35,12 @@ extern __inline__ void pmd_free(pmd_t *p
 
 static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
 {
-	return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	return (pmd_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT);
 }
 
 static inline pgd_t *pgd_alloc (struct mm_struct *mm)
 {
-	return (pgd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	return (pgd_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT);
 }
 
 static inline void pgd_free (pgd_t *pgd)
@@ -56,7 +56,7 @@ static inline pte_t *pte_alloc_one_kerne
 
 static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	void *p = (void *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT);
 	if (!p)
 		return NULL;
 	return virt_to_page(p);
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/pgtable.h linux-2.6.9-ve023stab054/include/asm-x86_64/pgtable.h
--- linux-2.6.9-100.orig/include/asm-x86_64/pgtable.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/pgtable.h	2011-06-15 19:26:18.000000000 +0400
@@ -446,7 +446,7 @@ extern inline pte_t pte_modify(pte_t pte
 }
 
 #define pte_index(address) \
-		((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+		(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
 			pte_index(address))
 
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/processor.h linux-2.6.9-ve023stab054/include/asm-x86_64/processor.h
--- linux-2.6.9-100.orig/include/asm-x86_64/processor.h	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/processor.h	2011-06-15 19:26:20.000000000 +0400
@@ -170,17 +170,17 @@ static inline void clear_in_cr4 (unsigne
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
-#define TASK_UNMAPPED_32 PAGE_ALIGN(IA32_PAGE_OFFSET/3)
-#define TASK_UNMAPPED_64 PAGE_ALIGN(TASK_SIZE/3) 
-#define TASK_UNMAPPED_BASE	\
-	(test_thread_flag(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)  
+#define IA32_PAGE_OFFSET	0xc0000000
+#define TASK_SIZE		(test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
+#define TASK_SIZE_OF(child)	((test_tsk_thread_flag(child, TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64))
+
+#define TASK_UNMAPPED_BASE	PAGE_ALIGN(TASK_SIZE/3)
 
 
 /*
  * User space process size: 512GB - 1GB (default).
  */
-#define TASK_SIZE	(0x0000007fc0000000UL)
+#define TASK_SIZE64	(0x0000007fc0000000UL)
 
 #define TASK_SIZE_3264 (test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE)
 
@@ -190,7 +190,6 @@ static inline void clear_in_cr4 (unsigne
 #define IO_BITMAP_BITS  1024
 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
 #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
 #define INVALID_IO_BITMAP_OFFSET 0x8000
 
 struct i387_fxsave_struct {
@@ -239,6 +238,8 @@ DECLARE_PER_CPU(struct tss_struct,init_t
 
 #define ARCH_MIN_TASKALIGN	16
 
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+
 struct thread_struct {
 	unsigned long	rsp0;
 	unsigned long	rsp;
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/segment.h linux-2.6.9-ve023stab054/include/asm-x86_64/segment.h
--- linux-2.6.9-100.orig/include/asm-x86_64/segment.h	2004-10-19 01:55:22.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/segment.h	2011-06-15 19:26:21.000000000 +0400
@@ -3,32 +3,31 @@
 
 #include <asm/cache.h>
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
-
-#define __KERNEL32_CS   0x38
-
+#define __KERNEL_COMPAT32_CS   0x8
+#define GDT_ENTRY_BOOT_CS		2
+#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
+#define GDT_ENTRY_BOOT_DS		3
+#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
+#define GDT_ENTRY_TSS 4	/* needs two entries */
 /* 
  * we cannot use the same code segment descriptor for user and kernel
  * -- not even in the long flat mode, because of different DPL /kkeil 
  * The segment offset needs to contain a RPL. Grr. -AK
  * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) 
  */
-
-#define __USER32_CS   0x23   /* 4*8+3 */ 
-#define __USER_DS     0x2b   /* 5*8+3 */ 
-#define __USER_CS     0x33   /* 6*8+3 */ 
-#define __USER32_DS	__USER_DS 
+#define GDT_ENTRY_TLS_MIN 6
+#define GDT_ENTRY_TLS_MAX 8
+#define GDT_ENTRY_KERNELCS16 9
 #define __KERNEL16_CS	(GDT_ENTRY_KERNELCS16 * 8)
-#define __KERNEL_COMPAT32_CS   0x8
 
-#define GDT_ENTRY_TLS 1
-#define GDT_ENTRY_TSS 8	/* needs two entries */
 #define GDT_ENTRY_LDT 10
-#define GDT_ENTRY_TLS_MIN 11
-#define GDT_ENTRY_TLS_MAX 13
-/* 14 free */
-#define GDT_ENTRY_KERNELCS16 15
+#define __KERNEL32_CS   0x58	/* 11*8 */
+#define __KERNEL_CS	0x60	/* 12*8 */
+#define __KERNEL_DS	0x68	/* 13*8 */
+#define __USER32_CS   0x73   /* 14*8+3 */ 
+#define __USER_DS     0x7b   /* 15*8+3 */ 
+#define __USER32_DS	__USER_DS 
+#define __USER_CS     0x83   /* 16*8+3 */ 
 
 #define GDT_ENTRY_TLS_ENTRIES 3
 
@@ -40,7 +39,7 @@
 #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
 
 #define IDT_ENTRIES 256
-#define GDT_ENTRIES 16
+#define GDT_ENTRIES 32
 #define GDT_SIZE (GDT_ENTRIES * 8)
 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 
 
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/thread_info.h linux-2.6.9-ve023stab054/include/asm-x86_64/thread_info.h
--- linux-2.6.9-100.orig/include/asm-x86_64/thread_info.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/thread_info.h	2011-06-15 19:26:22.000000000 +0400
@@ -33,6 +33,7 @@ struct thread_info {
 
 	mm_segment_t		addr_limit;	
 	struct restart_block    restart_block;
+	void			*sysenter_return;
 };
 #endif
 
@@ -105,6 +106,8 @@ static inline struct thread_info *stack_
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_IA32		17	/* 32bit process */ 
 #define TIF_FORK		18	/* ret_from_fork */
+#define TIF_FREEZE		20	/* Freeze request, atomic version of PF_FREEZE */
+#define TIF_MEMDIE		21	/* Process was killed by OOM */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/unistd.h linux-2.6.9-ve023stab054/include/asm-x86_64/unistd.h
--- linux-2.6.9-100.orig/include/asm-x86_64/unistd.h	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/unistd.h	2011-06-15 19:26:22.000000000 +0400
@@ -570,8 +570,33 @@ __SYSCALL(__NR_add_key, sys_add_key)
 __SYSCALL(__NR_request_key, sys_request_key)
 #define __NR_keyctl		250
 __SYSCALL(__NR_keyctl, sys_keyctl)
+#define __NR_fairsched_vcpus	499
+__SYSCALL(__NR_fairsched_vcpus, sys_fairsched_vcpus)
+#define __NR_getluid		500
+__SYSCALL(__NR_getluid, sys_getluid)
+#define __NR_setluid		501
+__SYSCALL(__NR_setluid, sys_setluid)
+#define __NR_setublimit		502
+__SYSCALL(__NR_setublimit, sys_setublimit)
+#define __NR_ubstat		503
+__SYSCALL(__NR_ubstat, sys_ubstat)
+#define __NR_fairsched_mknod	504 /* FairScheduler syscalls */
+__SYSCALL(__NR_fairsched_mknod, sys_fairsched_mknod)
+#define __NR_fairsched_rmnod	505
+__SYSCALL(__NR_fairsched_rmnod, sys_fairsched_rmnod)
+#define __NR_fairsched_chwt	506
+__SYSCALL(__NR_fairsched_chwt, sys_fairsched_chwt)
+#define __NR_fairsched_mvpr	507
+__SYSCALL(__NR_fairsched_mvpr, sys_fairsched_mvpr)
+#define __NR_fairsched_rate	508
+__SYSCALL(__NR_fairsched_rate, sys_fairsched_rate)
+#define __NR_lchmod		509
+__SYSCALL(__NR_lchmod, sys_lchmod)
+#define __NR_lutime		510
+__SYSCALL(__NR_lutime, sys_lutime)
+
+#define __NR_syscall_max __NR_lutime
 
-#define __NR_syscall_max __NR_keyctl
 #ifndef __NO_STUBS
 
 /* user-visible error numbers are in the range -1 - -4095 */
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/vsyscall.h linux-2.6.9-ve023stab054/include/asm-x86_64/vsyscall.h
--- linux-2.6.9-100.orig/include/asm-x86_64/vsyscall.h	2011-06-09 19:22:53.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/vsyscall.h	2011-06-15 19:26:19.000000000 +0400
@@ -1,8 +1,6 @@
 #ifndef _ASM_X86_64_VSYSCALL_H_
 #define _ASM_X86_64_VSYSCALL_H_
 
-#include <linux/seqlock.h>
-
 enum vsyscall_num {
 	__NR_vgettimeofday,
 	__NR_vtime,
@@ -15,13 +13,15 @@ enum vsyscall_num {
 
 #ifdef __KERNEL__
 
+#include <linux/seqlock.h>
+
 #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
 #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
 #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
 #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
 #define __section_sysctl_vsyscall __attribute__ ((unused, __section__ (".sysctl_vsyscall"), aligned(16)))
 #define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16)))
-#define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(64)))
+#define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(16)))
 
 #define VXTIME_TSC	1
 #define VXTIME_HPET	2
diff -Nurap linux-2.6.9-100.orig/include/asm-x86_64/vsyscall32.h linux-2.6.9-ve023stab054/include/asm-x86_64/vsyscall32.h
--- linux-2.6.9-100.orig/include/asm-x86_64/vsyscall32.h	2004-10-19 01:54:32.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/asm-x86_64/vsyscall32.h	2011-06-15 19:26:19.000000000 +0400
@@ -4,15 +4,21 @@
 /* Values need to match arch/x86_64/ia32/vsyscall.lds */
 
 #ifdef __ASSEMBLY__
-#define VSYSCALL32_BASE 0xffffe000
-#define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410)
+#define __IA32_PAGE_OFFSET 0xc0000000
+#define VSYSCALL32_BASE (__IA32_PAGE_OFFSET-PAGE_SIZE)
+/* For CPT: VSYSCALL32_SYSEXIT value must match SYSENTER_RETURN_OFFSET
+   value to be able to migrate vsyscall-sysenter page from x86_64 to i386 */
+#define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x420)
 #else
-#define VSYSCALL32_BASE 0xffffe000UL
+#define VSYSCALL32_BASE ((unsigned long)current->mm->context.vdso)
 #define VSYSCALL32_END (VSYSCALL32_BASE + PAGE_SIZE)
 #define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE)
 
+#define __VSYSCALL32_BASE ((unsigned long)(IA32_PAGE_OFFSET-PAGE_SIZE))
+#define __VSYSCALL32_END (__VSYSCALL32_BASE + PAGE_SIZE)
+
 #define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400) 
-#define VSYSCALL32_SYSEXIT ((void *)VSYSCALL32_BASE + 0x410)
+#define VSYSCALL32_SYSEXIT ((void *)VSYSCALL32_BASE + 0x420)
 #define VSYSCALL32_SIGRETURN ((void __user *)VSYSCALL32_BASE + 0x500) 
 #define VSYSCALL32_RTSIGRETURN ((void __user *)VSYSCALL32_BASE + 0x600) 
 #endif
diff -Nurap linux-2.6.9-100.orig/include/linux/binfmts.h linux-2.6.9-ve023stab054/include/linux/binfmts.h
--- linux-2.6.9-100.orig/include/linux/binfmts.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/binfmts.h	2011-06-15 19:26:19.000000000 +0400
@@ -2,6 +2,7 @@
 #define _LINUX_BINFMTS_H
 
 #include <linux/capability.h>
+#include <linux/fs.h>
 
 struct pt_regs;
 
@@ -28,6 +29,7 @@ struct linux_binprm{
 	int sh_bang;
 	struct file * file;
 	int e_uid, e_gid;
+	struct exec_perm perm;
 	kernel_cap_t cap_inheritable, cap_permitted, cap_effective;
 	void *security;
 	int argc, envc;
diff -Nurap linux-2.6.9-100.orig/include/linux/capability.h linux-2.6.9-ve023stab054/include/linux/capability.h
--- linux-2.6.9-100.orig/include/linux/capability.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/capability.h	2011-06-15 19:26:19.000000000 +0400
@@ -143,12 +143,9 @@ typedef __u32 kernel_cap_t;
 
 #define CAP_NET_BROADCAST    11
 
-/* Allow interface configuration */
 /* Allow administration of IP firewall, masquerading and accounting */
 /* Allow setting debug option on sockets */
 /* Allow modification of routing tables */
-/* Allow setting arbitrary process / process group ownership on
-   sockets */
 /* Allow binding to any address for transparent proxying */
 /* Allow setting TOS (type of service) */
 /* Allow setting promiscuous mode */
@@ -179,6 +176,7 @@ typedef __u32 kernel_cap_t;
 #define CAP_SYS_MODULE       16
 
 /* Allow ioperm/iopl access */
+/* Allow O_DIRECT access */
 /* Allow sending USB messages to any device via /proc/bus/usb */
 
 #define CAP_SYS_RAWIO        17
@@ -197,24 +195,19 @@ typedef __u32 kernel_cap_t;
 
 /* Allow configuration of the secure attention key */
 /* Allow administration of the random device */
-/* Allow examination and configuration of disk quotas */
 /* Allow configuring the kernel's syslog (printk behaviour) */
 /* Allow setting the domainname */
 /* Allow setting the hostname */
 /* Allow calling bdflush() */
-/* Allow mount() and umount(), setting up new smb connection */
+/* Allow setting up new smb connection */
 /* Allow some autofs root ioctls */
 /* Allow nfsservctl */
 /* Allow VM86_REQUEST_IRQ */
 /* Allow to read/write pci config on alpha */
 /* Allow irix_prctl on mips (setstacksize) */
 /* Allow flushing all cache on m68k (sys_cacheflush) */
-/* Allow removing semaphores */
-/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
-   and shared memory */
 /* Allow locking/unlocking of shared memory segment */
 /* Allow turning swap on/off */
-/* Allow forged pids on socket credentials passing */
 /* Allow setting readahead and flushing buffers on block devices */
 /* Allow setting geometry in floppy driver */
 /* Allow turning DMA on/off in xd driver */
@@ -231,6 +224,8 @@ typedef __u32 kernel_cap_t;
 /* Allow enabling/disabling tagged queuing on SCSI controllers and sending
    arbitrary SCSI commands */
 /* Allow setting encryption key on loopback filesystem */
+/* Modify data journaling mode on ext3 filesystem (uses journaling
+   resources) */
 
 #define CAP_SYS_ADMIN        21
 
@@ -250,8 +245,6 @@ typedef __u32 kernel_cap_t;
 /* Override resource limits. Set resource limits. */
 /* Override quota limits. */
 /* Override reserved space on ext2 filesystem */
-/* Modify data journaling mode on ext3 filesystem (uses journaling
-   resources) */
 /* NOTE: ext2 honors fsuid when checking for resource overrides, so 
    you can override using fsuid too */
 /* Override size restrictions on IPC message queues */
@@ -284,6 +277,36 @@ typedef __u32 kernel_cap_t;
 
 #define CAP_AUDIT_CONTROL    30
 
+/* Allow access to all information. In the other case some structures will be
+   hiding to ensure different Virtual Environment non-interaction on the same
+   node */
+#define CAP_SETVEID	     29
+
+#define CAP_VE_ADMIN	     30
+
+/* Replacement for CAP_NET_ADMIN:
+   delegated rights to the Virtual environment of its network administration.
+   For now the following rights have been delegated:
+
+   Allow setting arbitrary process / process group ownership on sockets
+   Allow interface configuration
+*/
+#define CAP_VE_NET_ADMIN     CAP_VE_ADMIN
+
+/* Replacement for CAP_SYS_ADMIN:
+   delegated rights to the Virtual environment of its administration.
+   For now the following rights have been delegated:
+*/
+/* Allow mount/umount/remount */
+/* Allow examination and configuration of disk quotas */
+/* Allow removing semaphores */
+/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
+   and shared memory */
+/* Allow locking/unlocking of shared memory segment */
+/* Allow forged pids on socket credentials passing */
+
+#define CAP_VE_SYS_ADMIN     CAP_VE_ADMIN
+
 #ifdef __KERNEL__
 /* 
  * Bounding set
@@ -358,9 +381,16 @@ static inline kernel_cap_t cap_invert(ke
 #define cap_issubset(a,set)  (!(cap_t(a) & ~cap_t(set)))
 
 #define cap_clear(c)         do { cap_t(c) =  0; } while(0)
+
+#ifndef CONFIG_VE
 #define cap_set_full(c)      do { cap_t(c) = ~0; } while(0)
-#define cap_mask(c,mask)     do { cap_t(c) &= cap_t(mask); } while(0)
+#else
+#define cap_set_full(c) \
+        do {cap_t(c) = ve_is_super(get_exec_env()) ? ~0 :		\
+					get_exec_env()->cap_default; } while(0)
+#endif
 
+#define cap_mask(c,mask)     do { cap_t(c) &= cap_t(mask); } while(0)
 #define cap_is_fs_cap(c)     (CAP_TO_MASK(c) & CAP_FS_MASK)
 
 #endif /* __KERNEL__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/coda_linux.h linux-2.6.9-ve023stab054/include/linux/coda_linux.h
--- linux-2.6.9-100.orig/include/linux/coda_linux.h	2004-10-19 01:53:24.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/coda_linux.h	2011-06-15 19:26:19.000000000 +0400
@@ -38,7 +38,8 @@ extern struct file_operations coda_ioctl
 int coda_open(struct inode *i, struct file *f);
 int coda_flush(struct file *f);
 int coda_release(struct inode *i, struct file *f);
-int coda_permission(struct inode *inode, int mask, struct nameidata *nd);
+int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *exec_perm);
 int coda_revalidate_inode(struct dentry *);
 int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 int coda_setattr(struct dentry *, struct iattr *);
diff -Nurap linux-2.6.9-100.orig/include/linux/console.h linux-2.6.9-ve023stab054/include/linux/console.h
--- linux-2.6.9-100.orig/include/linux/console.h	2011-06-09 19:22:59.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/console.h	2011-06-15 19:26:22.000000000 +0400
@@ -126,4 +126,22 @@ extern int is_console_locked(void);
 #define VESA_HSYNC_SUSPEND      2
 #define VESA_POWERDOWN          3
 
+
+#include <linux/preempt.h>
+#include <linux/cache.h>
+#include <linux/threads.h>
+
+struct printk_aligned {
+	int v;
+} ____cacheline_aligned;
+extern struct printk_aligned printk_no_wake_var[NR_CPUS];
+#define __printk_no_wake (printk_no_wake_var[smp_processor_id()].v)
+#define printk_no_wake ({ \
+			int v; \
+			preempt_disable(); \
+			v = __printk_no_wake; \
+			preempt_enable_no_resched(); \
+			v; \
+			})
+
 #endif /* _LINUX_CONSOLE_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/cpt_image.h linux-2.6.9-ve023stab054/include/linux/cpt_image.h
--- linux-2.6.9-100.orig/include/linux/cpt_image.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/cpt_image.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,1587 @@
+#ifndef __CPT_IMAGE_H_
+#define __CPT_IMAGE_H_ 1
+
+#define CPT_NULL (~0ULL)
+#define CPT_NOINDEX (~0U)
+
+/*
+ * Image file layout.
+ *
+ * - major header
+ * - sections[]
+ *
+ *	Each section is:
+ *	- section header
+ *	- array of objects
+ *
+ * All data records are arch independent, 64 bit aligned.
+ */
+
+enum _cpt_object_type
+{
+	CPT_OBJ_TASK = 0,
+	CPT_OBJ_MM,
+	CPT_OBJ_FS,
+	CPT_OBJ_FILES,
+	CPT_OBJ_FILE,
+	CPT_OBJ_SIGHAND_STRUCT,
+	CPT_OBJ_SIGNAL_STRUCT,
+	CPT_OBJ_TTY,
+	CPT_OBJ_SOCKET,
+	CPT_OBJ_SYSVSEM_UNDO,
+	CPT_OBJ_NAMESPACE,
+	CPT_OBJ_SYSV_SHM,
+	CPT_OBJ_INODE,
+	CPT_OBJ_UBC,
+	CPT_OBJ_SLM_SGREG,
+	CPT_OBJ_SLM_REGOBJ,
+	CPT_OBJ_SLM_MM,
+	CPT_OBJ_MAX,
+	/* The objects above are stored in memory while checkpointing */
+
+	CPT_OBJ_VMA = 1024,
+	CPT_OBJ_FILEDESC,
+	CPT_OBJ_SIGHANDLER,
+	CPT_OBJ_SIGINFO,
+	CPT_OBJ_LASTSIGINFO,
+	CPT_OBJ_SYSV_SEM,
+	CPT_OBJ_SKB,
+	CPT_OBJ_FLOCK,
+	CPT_OBJ_OPENREQ,
+	CPT_OBJ_VFSMOUNT,
+	CPT_OBJ_TRAILER,
+	CPT_OBJ_SYSVSEM_UNDO_REC,
+	CPT_OBJ_NET_DEVICE,
+	CPT_OBJ_NET_IFADDR,
+	CPT_OBJ_NET_ROUTE,
+	CPT_OBJ_NET_CONNTRACK,
+	CPT_OBJ_NET_CONNTRACK_EXPECT,
+	CPT_OBJ_AIO_CONTEXT,
+	CPT_OBJ_VEINFO,
+	CPT_OBJ_EPOLL,
+	CPT_OBJ_EPOLL_FILE,
+	CPT_OBJ_SKFILTER,
+	CPT_OBJ_SIGALTSTACK,
+  	CPT_OBJ_SOCK_MCADDR,
+	CPT_OBJ_BIND_MNT,
+
+	CPT_OBJ_X86_REGS = 4096,
+	CPT_OBJ_X86_64_REGS,
+	CPT_OBJ_PAGES,
+	CPT_OBJ_COPYPAGES,
+	CPT_OBJ_REMAPPAGES,
+	CPT_OBJ_LAZYPAGES,
+	CPT_OBJ_NAME,
+	CPT_OBJ_BITS,
+	CPT_OBJ_REF,
+	CPT_OBJ_ITERPAGES,
+	CPT_OBJ_ITERYOUNGPAGES,
+	CPT_OBJ_VSYSCALL,
+};
+
+#define CPT_ALIGN(n) (((n)+7)&~7)
+
+struct cpt_major_hdr
+{
+	__u8	cpt_signature[4];	/* Magic number */
+	__u16	cpt_hdrlen;		/* Length of this header */
+	__u16	cpt_image_version;	/* Format of this file */
+#define CPT_VERSION_8		0
+#define CPT_VERSION_9		0x100
+#define CPT_VERSION_9_1		0x101
+#define CPT_VERSION_16		0x200
+	__u16	cpt_os_arch;		/* Architecture */
+#define CPT_OS_ARCH_I386	0
+#define CPT_OS_ARCH_EMT64	1
+#define CPT_OS_ARCH_IA64	2
+	__u16	__cpt_pad1;
+	__u32	cpt_ve_features;	/* VE features */
+	__u32	cpt_ve_features2;	/* VE features */
+	__u16	cpt_pagesize;		/* Page size used by OS */
+	__u16	cpt_hz;			/* HZ used by OS */
+	__u64	cpt_start_jiffies64;	/* Jiffies */
+	__u32	cpt_start_sec;		/* Seconds */
+	__u32	cpt_start_nsec;		/* Nanoseconds */
+	__u32	cpt_cpu_caps[4];	/* CPU capabilities */
+	__u32	cpt_kernel_config[4];	/* Kernel config */
+	__u64	cpt_iptables_mask;	/* Used netfilter modules */
+} __attribute__ ((aligned (8)));
+
+#define CPT_SIGNATURE0 0x79
+#define CPT_SIGNATURE1 0x1c
+#define CPT_SIGNATURE2 0x01
+#define CPT_SIGNATURE3 0x63
+
+/* CPU capabilities */
+#define CPT_CPU_X86_CMOV	0
+#define CPT_CPU_X86_FXSR	1
+#define CPT_CPU_X86_SSE		2
+#define CPT_CPU_X86_SSE2	3
+#define CPT_CPU_X86_MMX		4
+#define CPT_CPU_X86_3DNOW	5
+#define CPT_CPU_X86_3DNOW2	6
+#define CPT_CPU_X86_SEP		7
+#define CPT_CPU_X86_EMT64	8
+#define CPT_CPU_X86_IA64	9
+#define CPT_CPU_X86_SYSCALL	10
+#define CPT_CPU_X86_SYSCALL32	11
+#define CPT_CPU_X86_SEP32	12
+
+/* Unsupported features */
+#define CPT_EXTERNAL_PROCESS	16
+#define CPT_NAMESPACES		17
+#define CPT_SCHEDULER_POLICY	18
+#define CPT_PTRACED_FROM_VE0	19
+#define CPT_UNSUPPORTED_FSTYPE	20
+#define CPT_BIND_MOUNT		21
+#define CPT_UNSUPPORTED_NETDEV	22
+#define CPT_SLM_DMPRST		24
+
+/* This mask is used to determine whether VE
+   has some unsupported features or not */
+#define CPT_UNSUPPORTED_MASK	0xffff0000UL
+
+#define CPT_KERNEL_CONFIG_PAE	0
+
+struct cpt_section_hdr
+{
+	__u64	cpt_next;
+	__u32	cpt_section;
+	__u16	cpt_hdrlen;
+	__u16	cpt_align;
+} __attribute__ ((aligned (8)));
+
+enum
+{
+	CPT_SECT_ERROR,			/* Error section, content is string */
+	CPT_SECT_VEINFO,
+	CPT_SECT_FILES,			/* Files. Content is array of file objects */
+	CPT_SECT_TASKS,
+	CPT_SECT_MM,
+	CPT_SECT_FILES_STRUCT,
+	CPT_SECT_FS,
+	CPT_SECT_SIGHAND_STRUCT,
+	CPT_SECT_TTY,
+	CPT_SECT_SOCKET,
+	CPT_SECT_NAMESPACE,
+	CPT_SECT_SYSVSEM_UNDO,
+	CPT_SECT_INODE,			/* Inodes with i->i_nlink==0 and
+					 * deleted dentires with inodes not
+					 * referenced inside dumped process.
+					 */
+	CPT_SECT_SYSV_SHM,
+	CPT_SECT_SYSV_SEM,
+	CPT_SECT_ORPHANS,
+	CPT_SECT_NET_DEVICE,
+	CPT_SECT_NET_IFADDR,
+	CPT_SECT_NET_ROUTE,
+	CPT_SECT_NET_IPTABLES,
+	CPT_SECT_NET_CONNTRACK,
+	CPT_SECT_NET_CONNTRACK_VE0,
+	CPT_SECT_UTSNAME,
+	CPT_SECT_TRAILER,
+	CPT_SECT_UBC,
+	CPT_SECT_SLM_SGREGS,
+	CPT_SECT_SLM_REGOBJS,
+/* Due to silly mistake we cannot index sections beyond this value */
+#define	CPT_SECT_MAX_INDEX	(CPT_SECT_SLM_REGOBJS+1)
+	CPT_SECT_EPOLL,
+	CPT_SECT_VSYSCALL,
+	CPT_SECT_INOTIFY,
+	CPT_SECT_SYSV_MSG,
+	CPT_SECT_MAX
+};
+
+struct cpt_major_tail
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_lazypages;
+	__u32	cpt_64bit;
+	__u64	cpt_sections[CPT_SECT_MAX_INDEX];
+	__u32	cpt_nsect;
+	__u8	cpt_signature[4];	/* Magic number */
+} __attribute__ ((aligned (8)));
+
+
+/* Common object header. */
+struct cpt_object_hdr
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+} __attribute__ ((aligned (8)));
+
+enum _cpt_content_type {
+	CPT_CONTENT_VOID,
+	CPT_CONTENT_ARRAY,
+	CPT_CONTENT_DATA,
+	CPT_CONTENT_NAME,
+
+	CPT_CONTENT_STACK,
+	CPT_CONTENT_X86_FPUSTATE_OLD,
+	CPT_CONTENT_X86_FPUSTATE,
+	CPT_CONTENT_MM_CONTEXT,
+	CPT_CONTENT_SEMARRAY,
+	CPT_CONTENT_SEMUNDO,
+	CPT_CONTENT_NLMARRAY,
+	CPT_CONTENT_MAX
+};
+
+/* CPT_OBJ_BITS: encode array of bytes */ 
+struct cpt_obj_bits
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_size;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_REF: a reference to another object */ 
+struct cpt_obj_ref
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_pos;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_VEINFO: various ve specific data */
+struct cpt_veinfo_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	/* ipc ctls */
+	__u32	shm_ctl_max;
+	__u32	shm_ctl_all;
+	__u32	shm_ctl_mni;
+	__u32	msg_ctl_max;
+	__u32	msg_ctl_mni;
+	__u32	msg_ctl_mnb;
+	__u32	sem_ctl_arr[4];
+
+	/* start time */
+	__u64	start_timespec_delta;
+	__u64	start_jiffies_delta;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_FILE: one struct file */ 
+struct cpt_file_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_flags;
+	__u32	cpt_mode;
+	__u64	cpt_pos;
+	__u32	cpt_uid;
+	__u32	cpt_gid;
+
+	__u32	cpt_i_mode;
+	__u32	cpt_lflags;
+#define CPT_DENTRY_DELETED	1
+#define CPT_DENTRY_ROOT		2
+#define CPT_DENTRY_CLONING	4
+#define CPT_DENTRY_PROC		8
+#define CPT_DENTRY_EPOLL	0x10
+#define CPT_DENTRY_REPLACED	0x20
+	__u64	cpt_inode;
+	__u64	cpt_priv;
+
+	__u32	cpt_fown_fd;
+	__u32	cpt_fown_pid;
+	__u32	cpt_fown_uid;
+	__u32	cpt_fown_euid;
+	__u32	cpt_fown_signo;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by file name, encoded as CPT_OBJ_NAME */
+
+struct cpt_epoll_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+} __attribute__ ((aligned (8)));
+/* Followed by array of struct cpt_epoll_file */
+
+struct cpt_epoll_file_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_fd;
+	__u32	cpt_events;
+	__u64	cpt_data;
+	__u32	cpt_revents;
+	__u32	cpt_ready;
+} __attribute__ ((aligned (8)));
+
+
+/* CPT_OBJ_FILEDESC: one file descriptor */
+struct cpt_fd_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_fd;
+	__u32	cpt_flags;
+#define CPT_FD_FLAG_CLOSEEXEC	1
+	__u64	cpt_file;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_FILES: one files_struct */
+struct cpt_files_struct_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u32	cpt_max_fds;
+	__u32	cpt_next_fd;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by array of cpt_fd_image */
+
+/* CPT_OBJ_FS: one fs_struct */
+struct cpt_fs_struct_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_umask;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by two/three CPT_OBJ_FILENAME for root, pwd and, optionally, altroot */
+
+/* CPT_OBJ_INODE: one struct inode */
+struct cpt_inode_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_dev;
+	__u64	cpt_ino;
+	__u32	cpt_mode;
+	__u32	cpt_nlink;
+	__u32	cpt_uid;
+	__u32	cpt_gid;
+	__u64	cpt_rdev;
+	__u64	cpt_size;
+	__u64	cpt_blksize;
+	__u64	cpt_atime;
+	__u64	cpt_mtime;
+	__u64	cpt_ctime;
+	__u64	cpt_blocks;
+	__u32	cpt_sb;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_VFSMOUNT: one vfsmount */
+struct cpt_vfsmount_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_mntflags;
+#define CPT_MNT_BIND	0x80000000
+#define CPT_MNT_EXT	0x40000000
+	__u32	cpt_flags;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_flock_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_owner;
+	__u32	cpt_pid;
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u32	cpt_flags;
+	__u32	cpt_type;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_tty_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_flags;
+	__u32	cpt_link;
+	__u32	cpt_index;
+	__u32	cpt_drv_type;
+	__u32	cpt_drv_subtype;
+	__u32	cpt_drv_flags;
+	__u8	cpt_packet;
+	__u8	cpt_stopped;
+	__u8	cpt_hw_stopped;
+	__u8	cpt_flow_stopped;
+
+	__u32	cpt_canon_data;
+	__u32	cpt_canon_head;
+	__u32	cpt_canon_column;
+	__u32	cpt_column;
+	__u8	cpt_ctrl_status;
+	__u8	cpt_erasing;
+	__u8	cpt_lnext;
+	__u8	cpt_icanon;
+	__u8	cpt_raw;
+	__u8	cpt_real_raw;
+	__u8	cpt_closing;
+	__u8	__cpt_pad1;
+	__u16	cpt_minimum_to_wake;
+	__u16	__cpt_pad2;
+	__u32	cpt_pgrp;
+	__u32	cpt_session;
+	__u32	cpt_c_line;
+	__u8	cpt_name[64];	
+	__u16	cpt_ws_row;
+	__u16	cpt_ws_col;
+	__u16	cpt_ws_prow;
+	__u16	cpt_ws_pcol;
+	__u8	cpt_c_cc[32];
+	__u32	cpt_c_iflag;
+	__u32	cpt_c_oflag;
+	__u32	cpt_c_cflag;
+	__u32	cpt_c_lflag;
+	__u32	cpt_read_flags[4096/32];
+} __attribute__ ((aligned (8)));
+
+struct cpt_sock_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_parent;
+	__u32	cpt_index;
+
+	__u64	cpt_ssflags;
+	__u16	cpt_type;
+	__u16	cpt_family;
+	__u8	cpt_sstate;
+	__u8	cpt_passcred;
+	__u8	cpt_state;
+	__u8	cpt_reuse;
+
+	__u8	cpt_zapped;
+	__u8	cpt_shutdown;
+	__u8	cpt_userlocks;
+	__u8	cpt_no_check;
+	__u8	cpt_debug;
+	__u8	cpt_rcvtstamp;
+	__u8	cpt_localroute;
+	__u8	cpt_protocol;
+
+	__u32	cpt_err;
+	__u32	cpt_err_soft;
+
+	__u16	cpt_max_ack_backlog;
+	__u16   __cpt_pad1;
+	__u32	cpt_priority;
+
+	__u32	cpt_rcvlowat;
+	__u32	cpt_bound_dev_if;
+
+	__u64	cpt_rcvtimeo;
+	__u64	cpt_sndtimeo;
+	__u32	cpt_rcvbuf;
+	__u32	cpt_sndbuf;
+	__u64	cpt_flags;
+	__u64	cpt_lingertime;
+	__u32	cpt_peer_pid;
+	__u32	cpt_peer_uid;
+
+	__u32	cpt_peer_gid;
+	__u32	cpt_laddrlen;
+	__u32	cpt_laddr[128/4];
+	__u32	cpt_raddrlen;
+	__u32	cpt_raddr[128/4];
+	/* AF_UNIX */
+	__u32	cpt_peer;
+
+	__u8	cpt_socketpair;
+	__u8	cpt_deleted;
+	__u16	__cpt_pad4;
+	__u32	__cpt_pad5;
+/*
+	struct sk_filter      	*sk_filter;
+ */
+
+	__u64			cpt_stamp;
+	__u32			cpt_daddr;
+	__u16			cpt_dport;
+	__u16			cpt_sport;
+
+	__u32			cpt_saddr;
+	__u32			cpt_rcv_saddr;
+
+	__u32			cpt_uc_ttl;
+	__u32			cpt_tos;
+
+	__u32			cpt_cmsg_flags;
+	__u32			cpt_mc_index;
+
+	__u32			cpt_mc_addr;
+/*
+	struct ip_options	*opt;
+ */
+	__u8			cpt_hdrincl;
+	__u8			cpt_mc_ttl;
+	__u8			cpt_mc_loop;
+	__u8			cpt_pmtudisc;
+
+	__u8			cpt_recverr;
+	__u8			cpt_freebind;
+	__u16			cpt_idcounter;
+	__u32			cpt_cork_flags;
+
+	__u32			cpt_cork_fragsize;
+	__u32			cpt_cork_length;
+	__u32			cpt_cork_addr;
+	__u32			cpt_cork_saddr;
+	__u32			cpt_cork_daddr;
+	__u32			cpt_cork_oif;
+
+	__u32			cpt_udp_pending;
+	__u32			cpt_udp_corkflag;
+	__u16			cpt_udp_encap;
+	__u16			cpt_udp_len;
+	__u32			__cpt_pad7;
+
+	__u64			cpt_saddr6[2];
+	__u64			cpt_rcv_saddr6[2];
+	__u64			cpt_daddr6[2];
+	__u32			cpt_flow_label6;
+	__u32			cpt_frag_size6;
+	__u32			cpt_hop_limit6;
+	__u32			cpt_mcast_hops6;
+
+	__u32			cpt_mcast_oif6;
+	__u8			cpt_rxopt6;
+	__u8			cpt_mc_loop6;
+	__u8			cpt_recverr6;
+	__u8			cpt_sndflow6;
+
+	__u8			cpt_pmtudisc6;
+	__u8			cpt_ipv6only6;
+	__u8			cpt_mapped;
+	__u8			__cpt_pad8;
+	__u32	cpt_pred_flags;
+
+	__u32	cpt_rcv_nxt;
+	__u32	cpt_snd_nxt;
+
+	__u32	cpt_snd_una;
+	__u32	cpt_snd_sml;
+
+	__u32	cpt_rcv_tstamp;
+	__u32	cpt_lsndtime;
+
+	__u8	cpt_tcp_header_len;
+	__u8	cpt_ack_pending;
+	__u8	cpt_quick;
+	__u8	cpt_pingpong;
+	__u8	cpt_blocked;
+	__u8	__cpt_pad9;
+	__u16	__cpt_pad10;
+
+	__u32	cpt_ato;
+	__u32	cpt_ack_timeout;
+
+	__u32	cpt_lrcvtime;
+	__u16	cpt_last_seg_size;
+	__u16	cpt_rcv_mss;
+
+	__u32	cpt_snd_wl1;
+	__u32	cpt_snd_wnd;
+
+	__u32	cpt_max_window;
+	__u32	cpt_pmtu_cookie;
+
+	__u32	cpt_mss_cache;
+	__u16	cpt_mss_cache_std;
+	__u16	cpt_mss_clamp;
+
+	__u16	cpt_ext_header_len;
+	__u16	cpt_ext2_header_len;
+	__u8	cpt_ca_state;
+	__u8	cpt_retransmits;
+	__u8	cpt_reordering;
+	__u8	cpt_frto_counter;
+
+	__u32	cpt_frto_highmark;
+	__u8	cpt_adv_cong;
+	__u8	cpt_defer_accept;
+	__u8	cpt_backoff;
+	__u8	__cpt_pad11;
+
+	__u32	cpt_srtt;
+	__u32	cpt_mdev;
+
+	__u32	cpt_mdev_max;
+	__u32	cpt_rttvar;
+
+	__u32	cpt_rtt_seq;
+	__u32	cpt_rto;
+
+	__u32	cpt_packets_out;
+	__u32	cpt_left_out;
+
+	__u32	cpt_retrans_out;
+ 	__u32	cpt_snd_ssthresh;
+
+ 	__u32	cpt_snd_cwnd;
+ 	__u16	cpt_snd_cwnd_cnt;
+	__u16	cpt_snd_cwnd_clamp;
+
+	__u32	cpt_snd_cwnd_used;
+	__u32	cpt_snd_cwnd_stamp;
+
+	__u32	cpt_timeout;
+	__u32	cpt_ka_timeout;
+
+ 	__u32	cpt_rcv_wnd;
+	__u32	cpt_rcv_wup;
+
+	__u32	cpt_write_seq;
+	__u32	cpt_pushed_seq;
+
+	__u32	cpt_copied_seq;
+	__u8	cpt_tstamp_ok;
+	__u8	cpt_wscale_ok;
+	__u8	cpt_sack_ok;
+	__u8	cpt_saw_tstamp;
+
+        __u8	cpt_snd_wscale;
+        __u8	cpt_rcv_wscale;
+	__u8	cpt_nonagle;
+	__u8	cpt_keepalive_probes;
+        __u32	cpt_rcv_tsval;
+
+        __u32	cpt_rcv_tsecr;
+        __u32	cpt_ts_recent;
+
+	__u64	cpt_ts_recent_stamp;
+	__u16	cpt_user_mss;
+	__u8	cpt_dsack;
+	__u8	cpt_eff_sacks;
+	__u32	cpt_sack_array[2*5];
+	__u32	cpt_window_clamp;
+
+	__u32	cpt_rcv_ssthresh;
+	__u8	cpt_probes_out;
+	__u8	cpt_num_sacks;
+	__u16	cpt_advmss;
+
+	__u8	cpt_syn_retries;
+	__u8	cpt_ecn_flags;
+	__u16	cpt_prior_ssthresh;
+	__u32	cpt_lost_out;
+
+	__u32   cpt_sacked_out;
+	__u32   cpt_fackets_out;
+
+	__u32   cpt_high_seq;
+	__u32	cpt_retrans_stamp;
+
+	__u32	cpt_undo_marker;
+	__u32	cpt_undo_retrans;
+
+	__u32	cpt_urg_seq;
+	__u16	cpt_urg_data;
+	__u8	cpt_pending;
+	__u8	cpt_urg_mode;
+
+	__u32	cpt_snd_up;
+	__u32	cpt_keepalive_time;
+
+	__u32   cpt_keepalive_intvl;
+	__u32   cpt_linger2;
+
+	__u32	cpt_rcvrtt_rtt;
+	__u32	cpt_rcvrtt_seq;
+
+	__u32	cpt_rcvrtt_time;
+	__u32	__cpt_pad12;
+} __attribute__ ((aligned (8)));
+
+struct cpt_sockmc_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u16	cpt_family;
+	__u16	cpt_mode;
+	__u32	cpt_ifindex;
+	__u32	cpt_mcaddr[4];
+} __attribute__ ((aligned (8)));
+/* Followed by array of source addresses, each zero padded to 16 bytes */
+
+struct cpt_openreq_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_rcv_isn;
+	__u32	cpt_snt_isn;
+
+	__u16	cpt_rmt_port;
+	__u16	cpt_mss;
+	__u8	cpt_family;
+	__u8	cpt_retrans;
+	__u8	cpt_snd_wscale;
+	__u8	cpt_rcv_wscale;
+
+	__u8	cpt_tstamp_ok;
+	__u8	cpt_sack_ok;
+	__u8	cpt_wscale_ok;
+	__u8	cpt_ecn_ok;
+	__u8	cpt_acked;
+	__u8	__cpt_pad1;
+	__u16	__cpt_pad2;
+
+	__u32	cpt_window_clamp;
+	__u32	cpt_rcv_wnd;
+	__u32	cpt_ts_recent;
+	__u32	cpt_iif;
+	__u64	cpt_expires;
+
+	__u64	cpt_loc_addr[2];
+	__u64	cpt_rmt_addr[2];
+/*
+	struct ip_options	*opt;
+ */
+	
+} __attribute__ ((aligned (8)));
+
+struct cpt_skb_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_owner;
+	__u32	cpt_queue;
+#define CPT_SKB_NQ	0
+#define CPT_SKB_RQ	1
+#define CPT_SKB_WQ	2
+#define CPT_SKB_OFOQ	3
+
+	__u64	cpt_stamp;
+	__u32	cpt_len;
+	__u32	cpt_hspace;
+	__u32	cpt_tspace;
+	__u32	cpt_h;
+	__u32	cpt_nh;
+	__u32	cpt_mac;
+	
+	__u64	cpt_cb[5];
+	__u32	cpt_mac_len;
+	__u32	cpt_csum;
+	__u8	cpt_local_df;
+	__u8	cpt_pkt_type;
+	__u8	cpt_ip_summed;
+	__u8	__cpt_pad1;
+	__u32	cpt_priority;
+	__u16	cpt_protocol;
+	__u16	cpt_security;
+	__u16	cpt_tso_segs;
+	__u16	cpt_tso_size;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_sysvshm_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_key;
+	__u64	cpt_uid;
+	__u64	cpt_gid;
+	__u64	cpt_cuid;
+	__u64	cpt_cgid;
+	__u64	cpt_mode;
+	__u64	cpt_seq;
+
+	__u32	cpt_id;
+	__u32	cpt_mlockuser;
+	__u64	cpt_segsz;
+	__u64	cpt_atime;
+	__u64	cpt_ctime;
+	__u64	cpt_dtime;
+	__u64	cpt_creator;
+	__u64	cpt_last;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_sysvsem_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_key;
+	__u64	cpt_uid;
+	__u64	cpt_gid;
+	__u64	cpt_cuid;
+	__u64	cpt_cgid;
+	__u64	cpt_mode;
+	__u64	cpt_seq;
+	__u32	cpt_id;
+	__u32	__cpt_pad1;
+
+	__u64	cpt_otime;
+	__u64	cpt_ctime;
+} __attribute__ ((aligned (8)));
+/* Content is array of pairs semval/sempid */
+
+struct cpt_sysvsem_undo_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_id;
+	__u32	cpt_nsem;
+} __attribute__ ((aligned (8)));
+
+struct cpt_sysvmsg_msg_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_type;
+	__u64	cpt_size;
+} __attribute__ ((aligned (8)));
+
+struct cpt_sysvmsg_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_key;
+	__u64	cpt_uid;
+	__u64	cpt_gid;
+	__u64	cpt_cuid;
+	__u64	cpt_cgid;
+	__u64	cpt_mode;
+	__u64	cpt_seq;
+	__u32	cpt_id;
+	__u32	__cpt_pad1;
+
+	__u64	cpt_stime;
+	__u64	cpt_rtime;
+	__u64	cpt_ctime;
+	__u64	cpt_last_sender;
+	__u64	cpt_last_receiver;
+	__u64	cpt_qbytes;
+} __attribute__ ((aligned (8)));
+/* Content is array of sysv msg */
+
+struct cpt_mm_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start_code;
+	__u64	cpt_end_code;
+	__u64	cpt_start_data;
+	__u64	cpt_end_data;
+	__u64	cpt_start_brk;
+	__u64	cpt_brk;
+	__u64	cpt_start_stack;
+	__u64	cpt_start_arg;
+	__u64	cpt_end_arg;
+	__u64	cpt_start_env;
+	__u64	cpt_end_env;
+	__u64	cpt_def_flags;
+	__u64	cpt_mmub;
+	__u8	cpt_dumpable;
+	__u8	cpt_vps_dumpable;
+	__u8	cpt_used_hugetlb;
+	__u8	__cpt_pad;
+	__u32   cpt_vdso;
+} __attribute__ ((aligned (8)));
+
+struct cpt_page_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+} __attribute__ ((aligned (8)));
+
+struct cpt_remappage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_pgoff;
+} __attribute__ ((aligned (8)));
+
+struct cpt_copypage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_source;
+} __attribute__ ((aligned (8)));
+
+struct cpt_lazypage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_index;
+} __attribute__ ((aligned (8)));
+
+struct cpt_iterpage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+} __attribute__ ((aligned (8)));
+/* Followed by array of PFNs */
+
+struct cpt_vma_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_type;
+#define CPT_VMA_TYPE_0		0
+#define CPT_VMA_TYPE_SHM	1
+#define CPT_VMA_VDSO		2
+	__u32	cpt_anonvma;
+	__u64	cpt_anonvmaid;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_flags;
+	__u64	cpt_pgprot;
+	__u64	cpt_pgoff;
+} __attribute__ ((aligned (8)));
+
+struct cpt_aio_ctx_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_max_reqs;
+	__u32	cpt_ring_pages;
+	__u32	cpt_tail;
+	__u32	cpt_nr;
+	__u64	cpt_mmap_base;
+	/* Data (io_event's) and struct aio_ring are stored in user space VM */
+} __attribute__ ((aligned (8)));
+
+
+/* Format of MM section.
+ *
+ * It is array of MM objects (mm_struct). Each MM object is
+ * header, encoding mm_struct, followed by array of VMA objects.
+ * Each VMA consists of VMA header, encoding vm_area_struct, and
+ * if the VMA contains copied pages, the header is followed by
+ * array of tuples start-end each followed by data.
+ *
+ * ATTN: no block/page alignment. Only 64bit alignment. This might be not good?
+ */
+
+struct cpt_restart_block {
+	__u64	fn;
+#define CPT_RBL_0			0
+#define CPT_RBL_NANOSLEEP		1
+#define CPT_RBL_COMPAT_NANOSLEEP	2
+	__u64	arg0;
+	__u64	arg1;
+	__u64	arg2;
+	__u64	arg3;
+} __attribute__ ((aligned (8)));
+
+struct cpt_siginfo_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_qflags;
+	__u32	cpt_signo;
+	__u32	cpt_errno;
+	__u32	cpt_code;
+
+	__u64	cpt_sigval;
+	__u32	cpt_pid;
+	__u32	cpt_uid;
+	__u64	cpt_utime;
+	__u64	cpt_stime;
+
+	__u64	cpt_user;
+} __attribute__ ((aligned (8)));
+
+/* Portable presentaions for segment registers */
+
+#define CPT_SEG_ZERO		0
+#define CPT_SEG_TLS1		1
+#define CPT_SEG_TLS2		2
+#define CPT_SEG_TLS3		3
+#define CPT_SEG_USER32_DS	4
+#define CPT_SEG_USER32_CS	5
+#define CPT_SEG_USER64_DS	6
+#define CPT_SEG_USER64_CS	7
+#define CPT_SEG_LDT		256
+
+struct cpt_x86_regs
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_debugreg[8];
+	__u32	cpt_fs;
+	__u32	cpt_gs;
+
+	__u32	cpt_ebx;
+	__u32	cpt_ecx;
+	__u32	cpt_edx;
+	__u32	cpt_esi;
+	__u32	cpt_edi;
+	__u32	cpt_ebp;
+	__u32	cpt_eax;
+	__u32	cpt_xds;
+	__u32	cpt_xes;
+	__u32	cpt_orig_eax;
+	__u32	cpt_eip;
+	__u32	cpt_xcs;
+	__u32	cpt_eflags;
+	__u32	cpt_esp;
+	__u32	cpt_xss;
+	__u32	cpt_pad;
+};
+
+struct cpt_x86_64_regs
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_debugreg[8];
+
+	__u64	cpt_fsbase;
+	__u64	cpt_gsbase;
+	__u32	cpt_fsindex;
+	__u32	cpt_gsindex;
+	__u32	cpt_ds;
+	__u32	cpt_es;
+
+	__u64	cpt_r15;
+	__u64	cpt_r14;
+	__u64	cpt_r13;
+	__u64	cpt_r12;
+	__u64	cpt_rbp;
+	__u64	cpt_rbx;
+	__u64	cpt_r11;
+	__u64	cpt_r10;	
+	__u64	cpt_r9;
+	__u64	cpt_r8;
+	__u64	cpt_rax;
+	__u64	cpt_rcx;
+	__u64	cpt_rdx;
+	__u64	cpt_rsi;
+	__u64	cpt_rdi;
+	__u64	cpt_orig_rax;
+	__u64	cpt_rip;
+	__u64	cpt_cs;
+	__u64	cpt_eflags;
+	__u64	cpt_rsp;
+	__u64	cpt_ss;
+};
+
+struct cpt_task_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_state;
+	__u64	cpt_flags;
+	__u64	cpt_ptrace;
+	__u32	cpt_prio;
+	__u32	cpt_static_prio;
+	__u32	cpt_policy;
+	__u32	cpt_rt_priority;
+
+	/* struct thread_info */
+	__u64	cpt_exec_domain;
+	__u64	cpt_thrflags;
+	__u64	cpt_thrstatus;
+	__u64	cpt_addr_limit;
+
+	__u64	cpt_personality;
+
+	__u64	cpt_mm;
+	__u64	cpt_files;
+	__u64	cpt_fs;
+	__u64	cpt_signal;
+	__u64	cpt_sighand;
+	__u64	cpt_sigblocked;
+	__u64	cpt_sigrblocked;
+	__u64	cpt_sigpending;
+	__u64	cpt_namespace;
+	__u64	cpt_sysvsem_undo;
+	__u32	cpt_pid;
+	__u32	cpt_tgid;
+	__u32	cpt_ppid;
+	__u32	cpt_rppid;
+	__u32	cpt_pgrp;
+	__u32	cpt_session;
+	__u32	cpt_old_pgrp;
+	__u32	__cpt_pad;
+	__u32	cpt_leader;
+	__u8	cpt_pn_state;
+	__u8	cpt_stopped_state;
+	__u8	cpt_sigsuspend_state;
+	__u8	cpt_64bit;
+	__u64	cpt_set_tid;
+	__u64	cpt_clear_tid;
+	__u32	cpt_exit_code;
+	__u32	cpt_exit_signal;
+	__u32	cpt_pdeath_signal;
+	__u32	cpt_user;
+	__u32	cpt_uid;
+	__u32	cpt_euid;
+	__u32	cpt_suid;
+	__u32	cpt_fsuid;
+	__u32	cpt_gid;
+	__u32	cpt_egid;
+	__u32	cpt_sgid;
+	__u32	cpt_fsgid;
+	__u32	cpt_ngids;
+	__u32	cpt_gids[32];
+	__u32	__cpt_pad2;
+	__u64	cpt_ecap;
+	__u64	cpt_icap;
+	__u64	cpt_pcap;
+	__u8	cpt_comm[16];
+	__u64	cpt_tls[3];
+	struct cpt_restart_block cpt_restart;
+	__u64	cpt_it_real_value;	/* V8: jiffies, V9..: nsec */
+	__u64	cpt_it_real_incr;	/* V8: jiffies, V9..: nsec */
+	__u64	cpt_it_prof_value;
+	__u64	cpt_it_prof_incr;
+	__u64	cpt_it_virt_value;
+	__u64	cpt_it_virt_incr;
+
+	__u16	cpt_used_math;
+	__u8	cpt_keepcap;
+	__u8	cpt_did_exec;
+	__u32	cpt_ptrace_message;
+
+	__u64	cpt_utime;
+	__u64	cpt_stime;
+	__u64	cpt_starttime;		/* V8: jiffies, V9...: timespec */
+	__u64	cpt_nvcsw;
+	__u64	cpt_nivcsw;
+	__u64	cpt_min_flt;
+	__u64	cpt_maj_flt;
+
+	__u64	cpt_sigsuspend_blocked;
+	__u64	cpt_cutime, cpt_cstime;
+	__u64	cpt_cnvcsw, cpt_cnivcsw;
+	__u64	cpt_cmin_flt, cpt_cmaj_flt;
+
+#define CPT_RLIM_NLIMITS 16
+	__u64	cpt_rlim_cur[CPT_RLIM_NLIMITS];
+	__u64	cpt_rlim_max[CPT_RLIM_NLIMITS];
+
+	__u64	cpt_task_ub;
+	__u64	cpt_exec_ub;
+	__u64	cpt_mm_ub;
+	__u64	cpt_fork_sub;
+} __attribute__ ((aligned (8)));
+
+struct cpt_sigaltstack_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_stack;
+	__u32	cpt_stacksize;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+struct cpt_signal_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_leader;
+	__u8	cpt_pgrp_type;
+	__u8	cpt_old_pgrp_type;
+	__u8	cpt_session_type;
+#define CPT_PGRP_NORMAL		0
+#define CPT_PGRP_ORPHAN		1
+#define CPT_PGRP_STRAY		2
+	__u8	__cpt_pad1;
+	__u64	cpt_pgrp;
+	__u64	cpt_old_pgrp;
+	__u64	cpt_session;
+	__u64	cpt_sigpending;
+	__u64	cpt_ctty;
+
+	__u32	cpt_curr_target;
+	__u32	cpt_group_exit;
+	__u32	cpt_group_exit_code;
+	__u32	cpt_group_exit_task;
+	__u32	cpt_notify_count;
+	__u32	cpt_group_stop_count;
+	__u32	cpt_stop_state;
+	__u32	__cpt_pad2;
+
+	__u64	cpt_utime, cpt_stime, cpt_cutime, cpt_cstime;
+	__u64	cpt_nvcsw, cpt_nivcsw, cpt_cnvcsw, cpt_cnivcsw;
+	__u64	cpt_min_flt, cpt_maj_flt, cpt_cmin_flt, cpt_cmaj_flt;
+
+	__u64	cpt_rlim_cur[CPT_RLIM_NLIMITS];
+	__u64	cpt_rlim_max[CPT_RLIM_NLIMITS];
+} __attribute__ ((aligned (8)));
+/* Followed by list of posix timers. */
+
+struct cpt_sighand_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+} __attribute__ ((aligned (8)));
+/* Followed by list of sighandles. */
+
+struct cpt_sighandler_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+	
+	__u32	cpt_signo;
+	__u32	__cpt_pad1;
+	__u64	cpt_handler;
+	__u64	cpt_restorer;
+	__u64	cpt_flags;
+	__u64	cpt_mask;
+} __attribute__ ((aligned (8)));
+
+struct cpt_netdev_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u32	cpt_flags;
+	__u8	cpt_name[16];
+} __attribute__ ((aligned (8)));
+
+struct cpt_ifaddr_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u8	cpt_family;
+	__u8	cpt_masklen;
+	__u8	cpt_flags;
+	__u8	cpt_scope;
+	__u32	cpt_address[4];
+	__u32	cpt_peer[4];
+	__u32	cpt_broadcast[4];
+	__u8	cpt_label[16];
+} __attribute__ ((aligned (8)));
+
+struct cpt_ipct_tuple
+{
+	__u32	cpt_src;
+	__u16	cpt_srcport;
+	__u16	__cpt_pad1;
+
+	__u32	cpt_dst;
+	__u16	cpt_dstport;
+	__u16	cpt_protonum;
+} __attribute__ ((aligned (8)));
+
+struct cpt_nat_manip
+{
+	__u8	cpt_direction;
+	__u8	cpt_hooknum;
+	__u8	cpt_maniptype;
+	__u8	__cpt_pad1;
+
+	__u32	cpt_manip_addr;
+	__u16	cpt_manip_port;
+	__u16	__cpt_pad2;
+	__u32	__cpt_pad3;
+} __attribute__ ((aligned (8)));
+
+struct cpt_nat_seq
+{
+	__u32	cpt_correction_pos;
+	__u32	cpt_offset_before;
+	__u32	cpt_offset_after;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ip_connexpect_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_timeout;
+	__u32	cpt_sibling_conntrack;	/* Index of child conntrack */
+	__u32	cpt_seq;
+
+	struct cpt_ipct_tuple	cpt_ct_tuple;
+	struct cpt_ipct_tuple	cpt_tuple;
+	struct cpt_ipct_tuple	cpt_mask;
+
+	/* union ip_conntrack_expect_help. Used by ftp, irc, amanda */
+	__u32	cpt_help[3];
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ip_conntrack_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	struct cpt_ipct_tuple cpt_tuple[2];
+	__u64	cpt_status;
+	__u64	cpt_timeout;
+	__u32	cpt_index;
+	__u8	cpt_ct_helper;
+	__u8	cpt_nat_helper;
+	__u16	__cpt_pad1;
+
+	/* union ip_conntrack_proto. Used by tcp and icmp. */
+	__u32	cpt_proto_data[12];
+
+	/* union ip_conntrack_help. Used only by ftp helper. */
+	__u32	cpt_help_data[4];
+
+	/* nat info */
+	__u32	cpt_initialized;
+	__u32	cpt_num_manips;
+	struct  cpt_nat_manip	cpt_nat_manips[6];
+
+	struct	cpt_nat_seq	cpt_nat_seq[2];
+
+	__u32	cpt_masq_index;
+	__u32	__cpt_pad2;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ip_conntrack_image_0
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	struct cpt_ipct_tuple cpt_tuple[2];
+	__u64	cpt_status;
+	__u64	cpt_timeout;
+	__u32	cpt_index;
+	__u8	cpt_ct_helper;
+	__u8	cpt_nat_helper;
+	__u16	__cpt_pad1;
+
+	/* union ip_conntrack_proto. Used by tcp and icmp. */
+	__u32	cpt_proto_data[2];
+
+	/* union ip_conntrack_help. Used only by ftp helper. */
+	__u32	cpt_help_data[4];
+
+	/* nat info */
+	__u32	cpt_initialized;
+	__u32	cpt_num_manips;
+	struct  cpt_nat_manip	cpt_nat_manips[6];
+
+	struct	cpt_nat_seq	cpt_nat_seq[2];
+
+	__u32	cpt_masq_index;
+	__u32	__cpt_pad2;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ubparm
+{
+	__u64	barrier;
+	__u64	limit;
+	__u64	held;
+	__u64	maxheld;
+	__u64	minheld;
+	__u64	failcnt;
+} __attribute__ ((aligned (8)));
+
+struct cpt_beancounter_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_parent;
+	__u32	cpt_id;
+	__u32	__cpt_pad;
+	struct	cpt_ubparm	cpt_parms[32 * 2];
+} __attribute__ ((aligned (8)));
+
+struct cpt_slm_sgreg_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_size;
+	__u32	__cpt_pad1;
+	__u32	cpt_id;
+	__u16	cpt_resource;
+	__u8	cpt_regname[32];
+	__u8	__cpt_pad2[2];
+} __attribute__ ((aligned (8)));
+
+struct cpt_slm_obj_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_size;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+
+#ifdef __KERNEL__
+#include <linux/signal.h>
+#include <linux/time.h>
+
+static inline void *cpt_ptr_import(__u64 ptr)
+{
+	return (void*)(unsigned long)ptr;
+}
+
+static inline __u64 cpt_ptr_export(void __user *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+static inline void cpt_sigset_import(sigset_t *sig, __u64 ptr)
+{
+	memcpy(sig, &ptr, sizeof(*sig));
+}
+
+static inline __u64 cpt_sigset_export(sigset_t *sig)
+{
+	return *(__u64*)sig;
+}
+
+static inline __u64 cpt_timespec_export(struct timespec *tv)
+{
+	return (((u64)tv->tv_sec) << 32) + tv->tv_nsec;
+}
+
+static inline void cpt_timespec_import(struct timespec *tv, __u64 val)
+{
+	tv->tv_sec = val>>32;
+	tv->tv_nsec = (val&0xFFFFFFFF);
+}
+
+static inline __u64 cpt_timeval_export(struct timeval *tv)
+{
+	return (((u64)tv->tv_sec) << 32) + tv->tv_usec;
+}
+
+static inline void cpt_timeval_import(struct timeval *tv, __u64 val)
+{
+	tv->tv_sec = val>>32;
+	tv->tv_usec = (val&0xFFFFFFFF);
+}
+
+#endif
+
+#endif /* __CPT_IMAGE_H_ */
diff -Nurap linux-2.6.9-100.orig/include/linux/cpt_ioctl.h linux-2.6.9-ve023stab054/include/linux/cpt_ioctl.h
--- linux-2.6.9-100.orig/include/linux/cpt_ioctl.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/cpt_ioctl.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,32 @@
+#ifndef _CPT_IOCTL_H_
+#define _CPT_IOCTL_H_ 1
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define CPTCTLTYPE '-'
+#define CPT_SET_DUMPFD	_IOW(CPTCTLTYPE, 1, int)
+#define CPT_SET_STATUSFD _IOW(CPTCTLTYPE, 2, int)
+#define CPT_SET_LOCKFD	_IOW(CPTCTLTYPE, 3, int)
+#define CPT_SET_VEID	_IOW(CPTCTLTYPE, 4, int)
+#define CPT_SUSPEND	_IO(CPTCTLTYPE, 5)
+#define CPT_DUMP	_IO(CPTCTLTYPE, 6)
+#define CPT_UNDUMP	_IO(CPTCTLTYPE, 7)
+#define CPT_RESUME	_IO(CPTCTLTYPE, 8)
+#define CPT_KILL	_IO(CPTCTLTYPE, 9)
+#define CPT_JOIN_CONTEXT _IO(CPTCTLTYPE, 10)
+#define CPT_GET_CONTEXT _IOW(CPTCTLTYPE, 11, unsigned int)
+#define CPT_PUT_CONTEXT _IO(CPTCTLTYPE, 12)
+#define CPT_SET_PAGEINFDIN _IOW(CPTCTLTYPE, 13, int)
+#define CPT_SET_PAGEINFDOUT _IOW(CPTCTLTYPE, 14, int)
+#define CPT_PAGEIND	_IO(CPTCTLTYPE, 15)
+#define CPT_VMPREP	_IOW(CPTCTLTYPE, 16, int)
+#define CPT_SET_LAZY	_IOW(CPTCTLTYPE, 17, int)
+#define CPT_SET_CPU_FLAGS _IOW(CPTCTLTYPE, 18, unsigned int)
+#define CPT_TEST_CAPS	_IOW(CPTCTLTYPE, 19, unsigned int)
+#define CPT_TEST_VECAPS	_IOW(CPTCTLTYPE, 20, unsigned int)
+#define CPT_SET_ERRORFD _IOW(CPTCTLTYPE, 21, int)
+
+#define CPT_ITER	_IOW(CPTCTLTYPE, 23, int)
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/linux/dcache.h linux-2.6.9-ve023stab054/include/linux/dcache.h
--- linux-2.6.9-100.orig/include/linux/dcache.h	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/dcache.h	2011-06-15 19:26:19.000000000 +0400
@@ -80,6 +80,8 @@ struct dcookie_struct;
 
 #define DNAME_INLINE_LEN_MIN 36
 
+#include <ub/ub_dcache.h>
+
 struct dentry {
 	atomic_t d_count;
 	unsigned int d_flags;		/* protected by d_lock */
@@ -95,6 +97,7 @@ struct dentry {
 	struct qstr d_name;
 
 	struct list_head d_lru;		/* LRU list */
+	struct list_head d_sb_lru;	/* per-sb LRU list */
 	struct list_head d_child;	/* child of parent list */
 	struct list_head d_subdirs;	/* our children */
 	struct list_head d_alias;	/* inode alias list */
@@ -107,9 +110,15 @@ struct dentry {
 	struct dcookie_struct *d_cookie; /* cookie, if any */
 	struct hlist_node d_hash;	/* lookup hash list */	
 	int d_mounted;
+	/* It can't be at the end because of DNAME_INLINE_LEN */
+	struct dentry_beancounter dentry_bc;
 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
 };
 
+#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
+
+#define dentry_bc(__d) (&(__d)->dentry_bc)
+
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, struct nameidata *);
 	int (*d_hash) (struct dentry *, struct qstr *);
@@ -157,6 +166,9 @@ d_iput:		no		no		no       yes
 
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
 #define DCACHE_UNHASHED		0x0010	
+#define DCACHE_VIRTUAL		0x0100	/* ve accessible */
+
+extern void mark_tree_virtual(struct vfsmount *m, struct dentry *d);
 
 extern spinlock_t dcache_lock;
 
@@ -164,17 +176,16 @@ extern spinlock_t dcache_lock;
  * d_drop - drop a dentry
  * @dentry: dentry to drop
  *
- * d_drop() unhashes the entry from the parent
- * dentry hashes, so that it won't be found through
- * a VFS lookup any more. Note that this is different
- * from deleting the dentry - d_delete will try to
- * mark the dentry negative if possible, giving a
- * successful _negative_ lookup, while d_drop will
+ * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
+ * be found through a VFS lookup any more. Note that this is different from
+ * deleting the dentry - d_delete will try to mark the dentry negative if
+ * possible, giving a successful _negative_ lookup, while d_drop will
  * just make the cache lookup fail.
  *
- * d_drop() is used mainly for stuff that wants
- * to invalidate a dentry for some reason (NFS
- * timeouts or autofs deletes).
+ * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
+ * reason (NFS timeouts or autofs deletes).
+ *
+ * __d_drop requires dentry->d_lock.
  */
 
 static inline void __d_drop(struct dentry *dentry)
@@ -188,7 +199,9 @@ static inline void __d_drop(struct dentr
 static inline void d_drop(struct dentry *dentry)
 {
 	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
  	__d_drop(dentry);
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 }
 
@@ -261,6 +274,7 @@ char * __d_path( struct dentry *dentry, 
 		 struct dentry *root, struct vfsmount *rootmnt,
 		 char *buffer, int buflen);
 
+extern int d_root_check(struct dentry *, struct vfsmount *);
 extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
  
 /* Allocation counts.. */
@@ -281,6 +295,13 @@ extern char * d_path(struct dentry *, st
 static inline struct dentry *dget(struct dentry *dentry)
 {
 	if (dentry) {
+#ifdef CONFIG_USER_RESOURCE
+		preempt_disable();
+		if (ub_dentry_on &&
+		    atomic_inc_and_test(&dentry_bc(dentry)->d_inuse))
+			BUG();
+		preempt_enable_no_resched();
+#endif
 		BUG_ON(!atomic_read(&dentry->d_count));
 		atomic_inc(&dentry->d_count);
 	}
@@ -323,6 +344,8 @@ extern struct dentry *lookup_create(stru
 
 extern int sysctl_vfs_cache_pressure;
 
+extern int check_area_access_ve(struct dentry *, struct vfsmount *);
+extern int check_area_execute_ve(struct dentry *, struct vfsmount *);
 #endif /* __KERNEL__ */
 
 #endif	/* __LINUX_DCACHE_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/device.h linux-2.6.9-ve023stab054/include/linux/device.h
--- linux-2.6.9-100.orig/include/linux/device.h	2011-06-09 19:22:53.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/device.h	2011-06-15 19:26:22.000000000 +0400
@@ -176,12 +176,19 @@ struct class_attribute class_attr_##_nam
 extern int class_create_file(struct class *, const struct class_attribute *);
 extern void class_remove_file(struct class *, const struct class_attribute *);
 
+struct class_device_attribute {
+	struct attribute	attr;
+	ssize_t (*show)(struct class_device *, char * buf);
+	ssize_t (*store)(struct class_device *, const char * buf, size_t count);
+};
 
 struct class_device {
 	struct list_head	node;
 
 	struct kobject		kobj;
 	struct class		* class;	/* required */
+	dev_t			devt;		/* dev_t, creates the sysfs "dev" */
+	struct class_device_attribute uevent_attr;
 	struct device		* dev;		/* not necessary, but nice to have */
 	void			* class_data;	/* class-specific data */
 
@@ -212,12 +219,6 @@ extern int class_device_rename(struct cl
 extern struct class_device * class_device_get(struct class_device *);
 extern void class_device_put(struct class_device *);
 
-struct class_device_attribute {
-	struct attribute	attr;
-	ssize_t (*show)(struct class_device *, char * buf);
-	ssize_t (*store)(struct class_device *, const char * buf, size_t count);
-};
-
 #define CLASS_DEVICE_ATTR(_name,_mode,_show,_store)		\
 struct class_device_attribute class_device_attr_##_name = 	\
 	__ATTR(_name,_mode,_show,_store)
@@ -226,7 +227,10 @@ extern int class_device_create_file(stru
 				    const struct class_device_attribute *);
 extern void class_device_remove_file(struct class_device *, 
 				     const struct class_device_attribute *);
-
+extern int class_device_create_bin_file(struct class_device *,
+					struct bin_attribute *);
+extern void class_device_remove_bin_file(struct class_device *,
+					 struct bin_attribute *);
 
 struct class_interface {
 	struct list_head	node;
@@ -256,6 +260,18 @@ extern int class_simple_set_hotplug(stru
 	int (*hotplug)(struct class_device *dev, char **envp, int num_envp, char *buffer, int buffer_size));
 extern void class_simple_device_remove(dev_t dev);
 
+/* driverfs interface for exporting device attributes */
+struct device_attribute {
+	struct attribute	attr;
+	ssize_t (*show)(struct device * dev, char * buf);
+	ssize_t (*store)(struct device * dev, const char * buf, size_t count);
+};
+
+#define DEVICE_ATTR(_name,_mode,_show,_store) \
+struct device_attribute dev_attr_##_name = __ATTR(_name,_mode,_show,_store)
+
+extern int device_create_file(struct device *device, struct device_attribute * entry);
+extern void device_remove_file(struct device * dev, struct device_attribute * attr);
 
 struct device {
 	struct list_head node;		/* node in sibling list */
@@ -266,6 +282,7 @@ struct device {
 
 	struct kobject kobj;
 	char	bus_id[BUS_ID_SIZE];	/* position on parent bus */
+	struct device_attribute uevent_attr;
 
 	struct bus_type	* bus;		/* type of bus device is on */
 	struct device_driver *driver;	/* which driver has allocated this
@@ -337,21 +354,6 @@ extern void device_release_driver(struct
 extern void driver_attach(struct device_driver * drv);
 
 
-/* driverfs interface for exporting device attributes */
-
-struct device_attribute {
-	struct attribute	attr;
-	ssize_t (*show)(struct device * dev, char * buf);
-	ssize_t (*store)(struct device * dev, const char * buf, size_t count);
-};
-
-#define DEVICE_ATTR(_name,_mode,_show,_store) \
-struct device_attribute dev_attr_##_name = __ATTR(_name,_mode,_show,_store)
-
-
-extern int device_create_file(struct device *device, struct device_attribute * entry);
-extern void device_remove_file(struct device * dev, struct device_attribute * attr);
-
 /*
  * Platform "fixup" functions - allow the platform to have their say
  * about devices and actions that the general device layer doesn't
diff -Nurap linux-2.6.9-100.orig/include/linux/devpts_fs.h linux-2.6.9-ve023stab054/include/linux/devpts_fs.h
--- linux-2.6.9-100.orig/include/linux/devpts_fs.h	2004-10-19 01:54:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/devpts_fs.h	2011-06-15 19:26:19.000000000 +0400
@@ -21,6 +21,13 @@ int devpts_pty_new(struct tty_struct *tt
 struct tty_struct *devpts_get_tty(int number);	 /* get tty structure */
 void devpts_pty_kill(int number);		 /* unlink */
 
+struct devpts_config {
+	int setuid;
+	int setgid;
+	uid_t   uid;
+	gid_t   gid;
+	umode_t mode;
+};
 #else
 
 /* Dummy stubs in the no-pty case */
diff -Nurap linux-2.6.9-100.orig/include/linux/elfcore.h linux-2.6.9-ve023stab054/include/linux/elfcore.h
--- linux-2.6.9-100.orig/include/linux/elfcore.h	2004-10-19 01:55:29.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/elfcore.h	2011-06-15 19:26:20.000000000 +0400
@@ -6,6 +6,8 @@
 #include <linux/time.h>
 #include <linux/user.h>
 
+extern int sysctl_at_vsyscall;
+
 struct elf_siginfo
 {
 	int	si_signo;			/* signal number */
diff -Nurap linux-2.6.9-100.orig/include/linux/eventpoll.h linux-2.6.9-ve023stab054/include/linux/eventpoll.h
--- linux-2.6.9-100.orig/include/linux/eventpoll.h	2004-10-19 01:54:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/eventpoll.h	2011-06-15 19:26:21.000000000 +0400
@@ -85,6 +85,87 @@ static inline void eventpoll_release(str
 	eventpoll_release_file(file);
 }
 
+struct epoll_filefd {
+	struct file *file;
+	int fd;
+};
+
+/*
+ * This structure is stored inside the "private_data" member of the file
+ * structure and rapresent the main data sructure for the eventpoll
+ * interface.
+ */
+struct eventpoll {
+	/* Protect the this structure access */
+	rwlock_t lock;
+
+	/*
+	 * This semaphore is used to ensure that files are not removed
+	 * while epoll is using them. This is read-held during the event
+	 * collection loop and it is write-held during the file cleanup
+	 * path, the epoll file exit code and the ctl operations.
+	 */
+	struct rw_semaphore sem;
+
+	/* Wait queue used by sys_epoll_wait() */
+	wait_queue_head_t wq;
+
+	/* Wait queue used by file->poll() */
+	wait_queue_head_t poll_wait;
+
+	/* List of ready file descriptors */
+	struct list_head rdllist;
+
+	/* RB-Tree root used to store monitored fd structs */
+	struct rb_root rbr;
+};
+
+/*
+ * Each file descriptor added to the eventpoll interface will
+ * have an entry of this type linked to the hash.
+ */
+struct epitem {
+	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
+	struct rb_node rbn;
+
+	/* List header used to link this structure to the eventpoll ready list */
+	struct list_head rdllink;
+
+	/* The file descriptor information this item refers to */
+	struct epoll_filefd ffd;
+
+	/* Number of active wait queue attached to poll operations */
+	int nwait;
+
+	/* List containing poll wait queues */
+	struct list_head pwqlist;
+
+	/* The "container" of this item */
+	struct eventpoll *ep;
+
+	/* The structure that describe the interested events and the source fd */
+	struct epoll_event event;
+
+	/*
+	 * Used to keep track of the usage count of the structure. This avoids
+	 * that the structure will desappear from underneath our processing.
+	 */
+	atomic_t usecnt;
+
+	/* List header used to link this item to the "struct file" items list */
+	struct list_head fllink;
+
+	/* List header used to link the item to the transfer list */
+	struct list_head txlink;
+
+	/*
+	 * This is used during the collection/transfer of events to userspace
+	 * to pin items empty events set.
+	 */
+	unsigned int revents;
+};
+
+extern struct semaphore epsem;
 
 #else
 
diff -Nurap linux-2.6.9-100.orig/include/linux/fairsched.h linux-2.6.9-ve023stab054/include/linux/fairsched.h
--- linux-2.6.9-100.orig/include/linux/fairsched.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/fairsched.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,121 @@
+#ifndef __LINUX_FAIRSCHED_H__
+#define __LINUX_FAIRSCHED_H__
+
+/*
+ * Fair Scheduler
+ *
+ * Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/cache.h>
+#include <linux/cpumask.h>
+#include <asm/timex.h>
+
+#define FAIRSCHED_HAS_CPU_BINDING	0
+
+typedef struct { cycles_t t; } fschtag_t;
+typedef struct { unsigned long d; } fschdur_t;
+typedef struct { cycles_t v; } fschvalue_t;
+
+struct vcpu_scheduler;
+
+struct fairsched_node {
+	struct list_head runlist;
+
+	/*
+	 * Fair Scheduler fields
+	 *
+	 * nr_running >= nr_ready (!= if delayed)
+	 */
+	fschtag_t start_tag;
+	int nr_ready;
+	int nr_runnable;
+	int nr_pcpu;
+	int vcpus;
+
+	/*
+	 * Rate limitator fields
+	 */
+	cycles_t last_updated_at;
+	fschvalue_t value;	/* leaky function value */
+	cycles_t delay;		/* removed from schedule till */
+	unsigned char delayed;
+
+	/*
+	 * Configuration
+	 *
+	 * Read-only most of the time.
+	 */
+	unsigned weight ____cacheline_aligned_in_smp;
+				/* fairness weight */
+	unsigned char rate_limited;
+	unsigned rate;		/* max CPU share */
+	fschtag_t max_latency;
+	unsigned min_weight;
+
+	struct list_head nodelist;
+	int id;
+#ifdef CONFIG_VE
+	struct ve_struct *owner_env;
+#endif
+	struct vcpu_scheduler *vsched;
+};
+
+#ifdef CONFIG_FAIRSCHED
+
+#define FSCHWEIGHT_MAX			((1 << 16) - 1)
+#define FSCHRATE_SHIFT			10
+
+/*
+ * Fairsched nodes used in boot process.
+ */
+extern struct fairsched_node fairsched_init_node;
+extern struct fairsched_node fairsched_idle_node;
+
+/*
+ * For proc output.
+ */
+extern unsigned fairsched_nr_cpus;
+extern void fairsched_cpu_online_map(int id, cpumask_t *mask);
+
+/* I hope vsched_id is always equal to fairsched node id  --SAW */
+#define task_fairsched_node_id(p)	task_vsched_id(p)
+
+/*
+ * Core functions.
+ */
+extern void fairsched_incrun(struct fairsched_node *node);
+extern void fairsched_decrun(struct fairsched_node *node);
+extern void fairsched_inccpu(struct fairsched_node *node);
+extern void fairsched_deccpu(struct fairsched_node *node);
+extern struct fairsched_node *fairsched_schedule(
+		struct fairsched_node *prev_node,
+		struct fairsched_node *cur_node,
+		int cur_node_active,
+		cycles_t time);
+
+/*
+ * Management functions.
+ */
+void fairsched_init_early(void);
+asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
+		unsigned int newid);
+asmlinkage int sys_fairsched_rmnod(unsigned int id);
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid);
+asmlinkage int sys_fairsched_vcpus(unsigned int id, unsigned int vcpus);
+
+#else /* CONFIG_FAIRSCHED */
+
+#define task_fairsched_node_id(p)	0
+#define fairsched_incrun(p)		do { } while (0)
+#define fairsched_decrun(p)		do { } while (0)
+#define fairsched_deccpu(p)		do { } while (0)
+#define fairsched_cpu_online_map(id, mask)	do { *(mask) = cpu_online_map; } while (0)
+
+#endif /* CONFIG_FAIRSCHED */
+
+#endif /* __LINUX_FAIRSCHED_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/faudit.h linux-2.6.9-ve023stab054/include/linux/faudit.h
--- linux-2.6.9-100.orig/include/linux/faudit.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/faudit.h	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,59 @@
+/*
+ *  include/linux/faudit.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __FAUDIT_H_
+#define __FAUDIT_H_
+
+#include <linux/config.h>
+#include <linux/virtinfo.h>
+
+struct vfsmount;
+struct dentry;
+struct super_block;
+struct kstatfs;
+struct kstat;
+struct pt_regs;
+
+struct faudit_regs_arg {
+	int err;
+	struct pt_regs *regs;
+};
+
+struct faudit_stat_arg {
+	int err;
+	struct vfsmount *mnt;
+	struct dentry *dentry;
+	struct kstat *stat;
+};
+
+struct faudit_stat64_arg {
+	int err;
+	struct vfsmount *mnt;
+	struct dentry *dentry;
+	struct kstat64 *stat;
+};
+
+struct faudit_statfs_arg {
+	int err;
+	struct super_block *sb;
+	struct kstatfs *stat;
+};
+
+#define VIRTINFO_FAUDIT			(0)
+#define VIRTINFO_FAUDIT_EXIT		(VIRTINFO_FAUDIT + 0)
+#define VIRTINFO_FAUDIT_FORK		(VIRTINFO_FAUDIT + 1)
+#define VIRTINFO_FAUDIT_CLONE		(VIRTINFO_FAUDIT + 2)
+#define VIRTINFO_FAUDIT_VFORK		(VIRTINFO_FAUDIT + 3)
+#define VIRTINFO_FAUDIT_EXECVE		(VIRTINFO_FAUDIT + 4)
+#define VIRTINFO_FAUDIT_STAT		(VIRTINFO_FAUDIT + 5)
+#define VIRTINFO_FAUDIT_STATFS		(VIRTINFO_FAUDIT + 6)
+#define VIRTINFO_FAUDIT_STAT64		(VIRTINFO_FAUDIT + 7)
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/linux/fb.h linux-2.6.9-ve023stab054/include/linux/fb.h
--- linux-2.6.9-100.orig/include/linux/fb.h	2004-10-19 01:53:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/fb.h	2011-06-15 19:26:18.000000000 +0400
@@ -748,7 +748,6 @@ extern void fb_destroy_modedb(struct fb_
 
 /* drivers/video/modedb.c */
 #define VESA_MODEDB_SIZE 34
-extern const struct fb_videomode vesa_modes[];
 extern void fb_var_to_videomode(struct fb_videomode *mode,
 				struct fb_var_screeninfo *var);
 extern void fb_videomode_to_var(struct fb_var_screeninfo *var,
@@ -798,6 +797,8 @@ struct fb_modelist {
 	struct fb_videomode mode;
 };
 
+extern const struct fb_videomode vesa_modes[];
+
 extern int fb_find_mode(struct fb_var_screeninfo *var,
 			struct fb_info *info, const char *mode_option,
 			const struct fb_videomode *db,
diff -Nurap linux-2.6.9-100.orig/include/linux/fs.h linux-2.6.9-ve023stab054/include/linux/fs.h
--- linux-2.6.9-100.orig/include/linux/fs.h	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/fs.h	2011-06-15 19:26:20.000000000 +0400
@@ -7,6 +7,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/ve_owner.h>
 #include <linux/linkage.h>
 #include <linux/limits.h>
 #include <linux/wait.h>
@@ -80,6 +81,7 @@ extern int leases_enable, dir_notify_ena
 #define FMODE_LSEEK	4
 #define FMODE_PREAD	8
 #define FMODE_PWRITE	FMODE_PREAD	/* These go hand in hand */
+#define FMODE_QUOTACTL	4
 
 /* File is being opened for execution. Primary users of this flag are
    distributed filesystems that can use it to achieve correct ETXTBUSY
@@ -103,6 +105,7 @@ extern int leases_enable, dir_notify_ena
 /* public flags for file_system_type */
 #define FS_REQUIRES_DEV 1 
 #define FS_BINARY_MOUNTDATA 2
+#define FS_VIRTUALIZED	64	/* Can mount this fstype inside ve */
 #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
 #define FS_ODD_RENAME	32768	/* Temporary stuff; will go away as soon
 				  * as nfs_rename() will be cleaned up
@@ -308,6 +311,9 @@ struct iattr {
  * Includes for diskquotas.
  */
 #include <linux/quota.h>
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+#include <linux/vzquota_qlnk.h>
+#endif
 
 /*
  * oh the beauties of C type declarations.
@@ -435,6 +441,7 @@ static inline int mapping_writably_mappe
 struct inode {
 	struct hlist_node	i_hash;
 	struct list_head	i_list;
+	struct list_head	i_sb_list;
 	struct list_head	i_dentry;
 	unsigned long		i_ino;
 	atomic_t		i_count;
@@ -465,6 +472,9 @@ struct inode {
 #ifdef CONFIG_QUOTA
 	struct dquot		*i_dquot[MAXQUOTAS];
 #endif
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+	struct vz_quota_ilink	i_qlnk;
+#endif
 	/* These three should probably be a union */
 	struct list_head	i_devices;
 	struct pipe_inode_info	*i_pipe;
@@ -553,6 +563,12 @@ static inline unsigned imajor(struct ino
 
 extern struct block_device *I_BDEV(struct inode *inode);
 
+struct exec_perm {
+	umode_t mode;
+	uid_t uid, gid;
+	int set;
+};
+
 struct fown_struct {
 	rwlock_t lock;          /* protects pid, uid, euid fields */
 	int pid;		/* pid or -pgrp where SIGIO should be sent */
@@ -591,6 +607,7 @@ struct file {
 	struct fown_struct	f_owner;
 	unsigned int		f_uid, f_gid;
 	struct file_ra_state	f_ra;
+	struct user_beancounter	*f_ub;
 
 	unsigned long		f_version;
 	void			*f_security;
@@ -604,7 +621,10 @@ struct file {
 	spinlock_t		f_ep_lock;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
+	struct ve_struct	*owner_env;
 };
+DCL_VE_OWNER_PROTO(FILP, GENERIC, struct file, owner_env,
+						inline, (always_inline))
 extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
 #define file_list_unlock() spin_unlock(&files_lock);
@@ -663,6 +683,7 @@ struct file_lock {
 	struct file *fl_file;
 	unsigned char fl_flags;
 	unsigned char fl_type;
+	unsigned char fl_charged;
 	loff_t fl_start;
 	loff_t fl_end;
 
@@ -779,9 +800,11 @@ struct super_block {
 	void                    *s_security;
 	struct xattr_handler	**s_xattr;
 
+	struct list_head	s_inodes;	/* all inodes */
 	struct list_head	s_dirty;	/* dirty inodes */
 	struct list_head	s_io;		/* parked for writeback */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
+	struct list_head	s_dentry_unused;
 	struct list_head	s_files;
 
 	struct block_device	*s_bdev;
@@ -945,6 +968,7 @@ struct file_operations {
 	int (*readdir) (struct file *, void *, filldir_t);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
+	int (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *);
@@ -985,7 +1009,8 @@ struct inode_operations {
 	int (*follow_link) (struct dentry *, struct nameidata *);
 	void (*put_link) (struct dentry *, struct nameidata *);
 	void (*truncate) (struct inode *);
-	int (*permission) (struct inode *, int, struct nameidata *);
+	int (*permission) (struct inode *, int, struct nameidata *,
+			struct exec_perm *);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -1035,6 +1060,8 @@ struct super_operations {
 	void (*umount_begin) (struct super_block *);
 
 	int (*show_options)(struct seq_file *, struct vfsmount *);
+
+	struct inode *(*get_quota_root)(struct super_block *);
 };
 
 /* Inode state bits.  Protected by inode_lock. */
@@ -1187,8 +1214,15 @@ struct file_system_type {
 	struct module *owner;
 	struct file_system_type * next;
 	struct list_head fs_supers;
+	struct ve_struct *owner_env;
 };
 
+DCL_VE_OWNER_PROTO(FSTYPE, MODULE_NOCHECK, struct file_system_type, owner_env
+						, , ())
+
+void get_filesystem(struct file_system_type *fs);
+void put_filesystem(struct file_system_type *fs);
+
 struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int));
@@ -1226,8 +1260,11 @@ extern struct vfsmount *kern_mount(struc
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
 extern long do_mount(char *, char *, char *, unsigned long, void *);
+extern void umount_tree(struct vfsmount *);
+#define kern_umount mntput
 
 extern int vfs_statfs(struct super_block *, struct kstatfs *);
+extern int faudit_statfs(struct super_block *, struct kstatfs *);
 
 /* Return value for VFS lock functions - tells locks.c to lock conventionally
  * REALLY kosha for root NFS and nfs_lock
@@ -1346,7 +1383,7 @@ extern void chrdev_show(struct seq_file 
 #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
 extern const char *__bdevname(dev_t, char *buffer);
 extern const char *bdevname(struct block_device *bdev, char *buffer);
-extern struct block_device *lookup_bdev(const char *);
+extern struct block_device *lookup_bdev(const char *, int mode);
 extern struct block_device *open_bdev_excl(const char *, int, void *);
 extern void close_bdev_excl(struct block_device *);
 extern void blkdev_show(struct seq_file *,off_t);
@@ -1380,7 +1417,7 @@ extern void check_disk_size_change(struc
 				   struct block_device *bdev);
 extern int revalidate_disk(struct gendisk *);
 extern int check_disk_change(struct block_device *);
-extern int invalidate_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *, int);
 extern int __invalidate_device(struct block_device *, int);
 extern int invalidate_partition(struct gendisk *, int);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
@@ -1410,8 +1447,9 @@ extern int do_remount_sb(struct super_bl
 extern sector_t bmap(struct inode *, sector_t);
 extern int setattr_mask(unsigned int);
 extern int notify_change(struct dentry *, struct iattr *);
-extern int permission(struct inode *, int, struct nameidata *);
-extern int vfs_permission(struct inode *, int);
+extern int permission(struct inode *, int, struct nameidata *,
+		struct exec_perm *);
+extern int vfs_permission(struct inode *, int, struct exec_perm *);
 extern int get_write_access(struct inode *);
 extern int deny_write_access(struct file *);
 static inline void put_write_access(struct inode * inode)
@@ -1428,8 +1466,9 @@ extern int do_pipe(int *);
 extern int open_namei(const char *, int, int, struct nameidata *);
 extern int may_open(struct nameidata *, int, int);
 
+struct linux_binprm;
 extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
-extern struct file * open_exec(const char *);
+extern struct file * open_exec(const char *, struct linux_binprm *);
  
 /* fs/dcache.c -- generic fs support functions */
 extern int is_subdir(struct dentry *, struct dentry *);
diff -Nurap linux-2.6.9-100.orig/include/linux/gfp.h linux-2.6.9-ve023stab054/include/linux/gfp.h
--- linux-2.6.9-100.orig/include/linux/gfp.h	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/gfp.h	2011-06-15 19:26:20.000000000 +0400
@@ -38,19 +38,26 @@ struct vm_area_struct;
 #define __GFP_NO_GROW	0x2000	/* Slab internal usage */
 #define __GFP_COMP	0x4000	/* Add compound page metadata */
 
-#define __GFP_BITS_SHIFT 16	/* Room for 16 __GFP_FOO bits */
+#define __GFP_UBC	0x10000	/* charge kmem in buddy and slab */
+#define __GFP_SOFT_UBC	0x20000	/* use soft charging */
+
+#define __GFP_BITS_SHIFT 18	/* Room for 18 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
 
 /* if you forget to add the bitmask here kernel will crash, period */
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
-			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
+			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
+			__GFP_UBC|__GFP_SOFT_UBC) 
 
 #define GFP_ATOMIC	(__GFP_HIGH | __GFP_NOWARN)
+#define GFP_ATOMIC_UBC	(__GFP_HIGH | __GFP_NOWARN | __GFP_UBC)
 #define GFP_NOIO	(__GFP_WAIT)
 #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
 #define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
+#define GFP_KERNEL_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS)
+#define GFP_USER_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
 #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM)
 
 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
diff -Nurap linux-2.6.9-100.orig/include/linux/inetdevice.h linux-2.6.9-ve023stab054/include/linux/inetdevice.h
--- linux-2.6.9-100.orig/include/linux/inetdevice.h	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/inetdevice.h	2011-06-15 19:26:20.000000000 +0400
@@ -30,6 +30,12 @@ struct ipv4_devconf
 };
 
 extern struct ipv4_devconf ipv4_devconf;
+extern struct ipv4_devconf ipv4_devconf_dflt;
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_ipv4_devconf		(*(get_exec_env()->_ipv4_devconf))
+#else
+#define ve_ipv4_devconf		ipv4_devconf
+#endif
 
 struct in_device
 {
@@ -56,28 +62,28 @@ struct in_device
 };
 
 #define IN_DEV_FORWARD(in_dev)		((in_dev)->cnf.forwarding)
-#define IN_DEV_MFORWARD(in_dev)		(ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
-#define IN_DEV_RPFILTER(in_dev)		(ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
-#define IN_DEV_SOURCE_ROUTE(in_dev)	(ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
-#define IN_DEV_BOOTP_RELAY(in_dev)	(ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
-
-#define IN_DEV_LOG_MARTIANS(in_dev)	(ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
-#define IN_DEV_PROXY_ARP(in_dev)	(ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
-#define IN_DEV_SHARED_MEDIA(in_dev)	(ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
-#define IN_DEV_TX_REDIRECTS(in_dev)	(ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
-#define IN_DEV_SEC_REDIRECTS(in_dev)	(ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
+#define IN_DEV_MFORWARD(in_dev)		(ve_ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
+#define IN_DEV_RPFILTER(in_dev)		(ve_ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
+#define IN_DEV_SOURCE_ROUTE(in_dev)	(ve_ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
+#define IN_DEV_BOOTP_RELAY(in_dev)	(ve_ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
+
+#define IN_DEV_LOG_MARTIANS(in_dev)	(ve_ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
+#define IN_DEV_PROXY_ARP(in_dev)	(ve_ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
+#define IN_DEV_SHARED_MEDIA(in_dev)	(ve_ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
+#define IN_DEV_TX_REDIRECTS(in_dev)	(ve_ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
+#define IN_DEV_SEC_REDIRECTS(in_dev)	(ve_ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
 #define IN_DEV_IDTAG(in_dev)		((in_dev)->cnf.tag)
 #define IN_DEV_MEDIUM_ID(in_dev)	((in_dev)->cnf.medium_id)
 
 #define IN_DEV_RX_REDIRECTS(in_dev) \
 	((IN_DEV_FORWARD(in_dev) && \
-	  (ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
+	  (ve_ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
 	 || (!IN_DEV_FORWARD(in_dev) && \
-	  (ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
+	  (ve_ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
 
-#define IN_DEV_ARPFILTER(in_dev)	(ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
-#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
-#define IN_DEV_ARP_IGNORE(in_dev)	(max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
+#define IN_DEV_ARPFILTER(in_dev)	(ve_ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
+#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ve_ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
+#define IN_DEV_ARP_IGNORE(in_dev)	(max(ve_ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
 
 struct in_ifaddr
 {
@@ -108,6 +114,7 @@ extern u32		inet_select_addr(const struc
 extern u32		inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope);
 extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask);
 extern void		inet_forward_change(void);
+extern void		inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
 
 static __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa)
 {
@@ -175,6 +182,10 @@ static inline void in_dev_put(struct in_
 #define __in_dev_put(idev)  atomic_dec(&(idev)->refcnt)
 #define in_dev_hold(idev)   atomic_inc(&(idev)->refcnt)
 
+struct ve_struct;
+extern int devinet_sysctl_init(struct ve_struct *);
+extern void devinet_sysctl_fini(struct ve_struct *);
+extern void devinet_sysctl_free(struct ve_struct *);
 #endif /* __KERNEL__ */
 
 static __inline__ __u32 inet_make_mask(int logmask)
diff -Nurap linux-2.6.9-100.orig/include/linux/initrd.h linux-2.6.9-ve023stab054/include/linux/initrd.h
--- linux-2.6.9-100.orig/include/linux/initrd.h	2004-10-19 01:55:28.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/initrd.h	2011-06-15 19:26:19.000000000 +0400
@@ -14,7 +14,7 @@ extern int rd_image_start;
 extern int initrd_below_start_ok;
 
 /* free_initrd_mem always gets called with the next two as arguments.. */
-extern unsigned long initrd_start, initrd_end;
+extern unsigned long initrd_start, initrd_end, initrd_copy;
 extern void free_initrd_mem(unsigned long, unsigned long);
 
 extern unsigned int real_root_dev;
diff -Nurap linux-2.6.9-100.orig/include/linux/jbd.h linux-2.6.9-ve023stab054/include/linux/jbd.h
--- linux-2.6.9-100.orig/include/linux/jbd.h	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/jbd.h	2011-06-15 19:26:19.000000000 +0400
@@ -242,6 +242,21 @@ typedef struct journal_superblock_s
 #include <asm/bug.h>
 
 #define JBD_ASSERTIONS
+#define JBD_SOFT_ASSERTIONS
+#ifdef JBD_SOFT_ASSERTIONS
+#define J_BUG()								\
+do {									\
+	unsigned long stack;						\
+	printk("Stack=%p current=%p pid=%d ve=%d process='%s'\n",	\
+		&stack, current, current->pid,				\
+		get_exec_env()->veid,					\
+		current->comm);						\
+		dump_stack();						\
+} while(0)
+#else
+#define J_BUG()		BUG()
+#endif
+
 #ifdef JBD_ASSERTIONS
 #define J_ASSERT(assert)						\
 do {									\
@@ -249,7 +264,7 @@ do {									\
 		printk (KERN_EMERG					\
 			"Assertion failure in %s() at %s:%d: \"%s\"\n",	\
 			__FUNCTION__, __FILE__, __LINE__, # assert);	\
-		BUG();							\
+		J_BUG();						\
 	}								\
 } while (0)
 
@@ -836,6 +851,12 @@ struct journal_s
 	struct jbd_revoke_table_s *j_revoke_table[2];
 
 	/*
+	 * array of bhs for journal_commit_transaction
+	 */
+	struct buffer_head	**j_wbuf;
+	int			j_wbufsize;
+
+	/*
 	 * An opaque pointer to fs-private information.  ext3 puts its
 	 * superblock pointer here
 	 */
diff -Nurap linux-2.6.9-100.orig/include/linux/jiffies.h linux-2.6.9-ve023stab054/include/linux/jiffies.h
--- linux-2.6.9-100.orig/include/linux/jiffies.h	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/jiffies.h	2011-06-15 19:26:19.000000000 +0400
@@ -15,6 +15,7 @@
  */
 extern u64 jiffies_64;
 extern unsigned long volatile jiffies;
+extern unsigned long cycles_per_jiffy, cycles_per_clock;
 
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void);
diff -Nurap linux-2.6.9-100.orig/include/linux/kdev_t.h linux-2.6.9-ve023stab054/include/linux/kdev_t.h
--- linux-2.6.9-100.orig/include/linux/kdev_t.h	2004-10-19 01:54:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/kdev_t.h	2011-06-15 19:26:19.000000000 +0400
@@ -87,6 +87,57 @@ static inline unsigned sysv_minor(u32 de
 	return dev & 0x3ffff;
 }
 
+#define UNNAMED_MAJOR_COUNT	16
+
+#if UNNAMED_MAJOR_COUNT > 1
+
+extern int unnamed_dev_majors[UNNAMED_MAJOR_COUNT];
+
+static inline dev_t make_unnamed_dev(int idx)
+{
+	/*
+	 * Here we transfer bits from 8 to 8+log2(UNNAMED_MAJOR_COUNT) of the
+	 * unnamed device index into major number.
+	 */
+	return MKDEV(unnamed_dev_majors[(idx >> 8) & (UNNAMED_MAJOR_COUNT - 1)],
+		     idx & ~((UNNAMED_MAJOR_COUNT - 1) << 8));
+}
+
+static inline int unnamed_dev_idx(dev_t dev)
+{
+	int i;
+	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
+				MAJOR(dev) != unnamed_dev_majors[i]; i++);
+	return MINOR(dev) | (i << 8);
+}
+
+static inline int is_unnamed_dev(dev_t dev)
+{
+	int i;
+	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
+				MAJOR(dev) != unnamed_dev_majors[i]; i++);
+	return i < UNNAMED_MAJOR_COUNT;
+}
+
+#else /* UNNAMED_MAJOR_COUNT */
+
+static inline dev_t make_unnamed_dev(int idx)
+{
+	return MKDEV(0, idx);
+}
+
+static inline int unnamed_dev_idx(dev_t dev)
+{
+	return MINOR(dev);
+}
+
+static inline int is_unnamed_dev(dev_t dev)
+{
+	return MAJOR(dev) == 0;
+}
+
+#endif /* UNNAMED_MAJOR_COUNT */
+
 
 #else /* __KERNEL__ */
 
diff -Nurap linux-2.6.9-100.orig/include/linux/kernel.h linux-2.6.9-ve023stab054/include/linux/kernel.h
--- linux-2.6.9-100.orig/include/linux/kernel.h	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/kernel.h	2011-06-15 19:26:22.000000000 +0400
@@ -121,10 +121,18 @@ extern int __kernel_text_address(unsigne
 extern int kernel_text_address(unsigned long addr);
 extern int session_of_pgrp(int pgrp);
 
-asmlinkage int vprintk(const char *fmt, va_list args);
+asmlinkage int vprintk(const char *fmt, va_list args)
+	__attribute__ ((format (printf, 1, 0)));
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
 
+#define VE0_LOG		1
+#define VE_LOG		2
+#define VE_LOG_BOTH	(VE0_LOG | VE_LOG)
+asmlinkage int ve_printk(int, const char * fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+void prepare_printk(void);
+
 extern int dmesg_restrict;
 
 unsigned long int_sqrt(unsigned long);
@@ -147,9 +155,14 @@ static inline int __attribute_pure__ lon
 extern int printk_ratelimit(void);
 extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
 
+extern int console_silence_loglevel;
+
 static inline void console_silent(void)
 {
-	console_loglevel = 0;
+	if (console_loglevel > console_silence_loglevel) {
+		printk("console shuts up ...\n");
+		console_loglevel = 0;
+	}
 }
 
 static inline void console_verbose(void)
@@ -159,12 +172,16 @@ static inline void console_verbose(void)
 }
 
 extern void bust_spinlocks(int yes);
+extern void wake_up_klogd(void);
 extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
 extern int panic_on_oops;
 extern int panic_on_unrecovered_nmi;
+extern int decode_call_traces;
 extern int tainted;
+extern int kernel_text_csum_broken;
 extern const char *print_tainted(void);
 extern void add_taint(unsigned);
+extern int alloc_fail_warn;
 
 #define crashdump_mode()       unlikely(netdump_mode || diskdump_mode)
 
diff -Nurap linux-2.6.9-100.orig/include/linux/kmem_cache.h linux-2.6.9-ve023stab054/include/linux/kmem_cache.h
--- linux-2.6.9-100.orig/include/linux/kmem_cache.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/kmem_cache.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,196 @@
+#ifndef __KMEM_CACHE_H__
+#define __KMEM_CACHE_H__
+
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <asm/atomic.h>
+
+/*
+ * SLAB_DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+ *		  SLAB_RED_ZONE & SLAB_POISON.
+ *		  0 for faster, smaller code (especially in the critical paths).
+ *
+ * SLAB_STATS	- 1 to collect stats for /proc/slabinfo.
+ *		  0 for faster, smaller code (especially in the critical paths).
+ *
+ * SLAB_FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
+ */
+
+#ifdef CONFIG_DEBUG_SLAB
+#define	SLAB_DEBUG		1
+#define	SLAB_STATS		1
+#define	SLAB_FORCED_DEBUG	1
+#else
+#define	SLAB_DEBUG		0
+#define	SLAB_STATS		0
+#define	SLAB_FORCED_DEBUG	0
+#endif
+
+/*
+ * struct array_cache
+ *
+ * Per cpu structures
+ * Purpose:
+ * - LIFO ordering, to hand out cache-warm objects from _alloc
+ * - reduce the number of linked list operations
+ * - reduce spinlock operations
+ *
+ * The limit is stored in the per-cpu structure to reduce the data cache
+ * footprint.
+ *
+ */
+struct array_cache {
+	unsigned int avail;
+	unsigned int limit;
+	unsigned int batchcount;
+	unsigned int touched;
+};
+
+/* bootstrap: The caches do not work without cpuarrays anymore,
+ * but the cpuarrays are allocated from the generic caches...
+ */
+#define BOOT_CPUCACHE_ENTRIES	1
+struct arraycache_init {
+	struct array_cache cache;
+	void * entries[BOOT_CPUCACHE_ENTRIES];
+};
+
+/*
+ * The slab lists of all objects.
+ * Hopefully reduce the internal fragmentation
+ * NUMA: The spinlock could be moved from the kmem_cache_t
+ * into this structure, too. Figure out what causes
+ * fewer cross-node spinlock operations.
+ */
+struct kmem_list3 {
+	struct list_head	slabs_partial;	/* partial list first, better asm code */
+	struct list_head	slabs_full;
+	struct list_head	slabs_free;
+	unsigned long	free_objects;
+	int		free_touched;
+	unsigned long	next_reap;
+	struct array_cache	*shared;
+};
+
+#define LIST3_INIT(parent) \
+	{ \
+		.slabs_full	= LIST_HEAD_INIT(parent.slabs_full), \
+		.slabs_partial	= LIST_HEAD_INIT(parent.slabs_partial), \
+		.slabs_free	= LIST_HEAD_INIT(parent.slabs_free) \
+	}
+#define list3_data(cachep) \
+	(&(cachep)->lists)
+
+/* NUMA: per-node */
+#define list3_data_ptr(cachep, ptr) \
+		list3_data(cachep)
+
+/*
+ * kmem_cache_t
+ *
+ * manages a cache.
+ */
+	
+struct kmem_cache_s {
+/* 1) per-cpu data, touched during every alloc/free */
+	struct array_cache	*array[NR_CPUS];
+	unsigned int		batchcount;
+	unsigned int		limit;
+/* 2) touched by every alloc & free from the backend */
+	struct kmem_list3	lists;
+	/* NUMA: kmem_3list_t	*nodelists[MAX_NUMNODES] */
+	unsigned int		objsize;
+	unsigned int	 	flags;	/* constant flags */
+	unsigned int		num;	/* # of objs per slab */
+	unsigned int		free_limit; /* upper limit of objects in the lists */
+	spinlock_t		spinlock;
+
+/* 3) cache_grow/shrink */
+	/* order of pgs per slab (2^n) */
+	unsigned int		gfporder;
+
+	/* force GFP flags, e.g. GFP_DMA */
+	unsigned int		gfpflags;
+
+	size_t			colour;		/* cache colouring range */
+	unsigned int		colour_off;	/* colour offset */
+	unsigned int		colour_next;	/* cache colouring */
+	kmem_cache_t		*slabp_cache;
+	unsigned int		slab_size;
+	unsigned int		dflags;		/* dynamic flags */
+
+	/* constructor func */
+	void (*ctor)(void *, kmem_cache_t *, unsigned long);
+
+	/* de-constructor func */
+	void (*dtor)(void *, kmem_cache_t *, unsigned long);
+
+/* 4) cache creation/removal */
+	const char		*name;
+	struct list_head	next;
+
+/* 5) statistics */
+	unsigned long		grown;
+	unsigned long		reaped;
+	unsigned long		shrunk;
+#if SLAB_STATS
+	unsigned long		num_active;
+	unsigned long		num_allocations;
+	unsigned long		high_mark;
+	unsigned long 		errors;
+	unsigned long		max_freeable;
+	atomic_t		allochit;
+	atomic_t		allocmiss;
+	atomic_t		freehit;
+	atomic_t		freemiss;
+#endif
+#if SLAB_DEBUG
+	int			dbghead;
+	int			reallen;
+#endif
+#ifdef CONFIG_USER_RESOURCE
+	unsigned int		objuse;
+#endif
+};
+
+/* Macros for storing/retrieving the cachep and or slab from the
+ * global 'mem_map'. These are used to find the slab an obj belongs to.
+ * With kfree(), these are used to find the cache which an obj belongs to.
+ */
+#define	SET_PAGE_CACHE(pg,x)  ((pg)->lru.next = (struct list_head *)(x))
+#define	GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->lru.next)
+#define	SET_PAGE_SLAB(pg,x)   ((pg)->lru.prev = (struct list_head *)(x))
+#define	GET_PAGE_SLAB(pg)     ((struct slab *)(pg)->lru.prev)
+
+#define CFLGS_OFF_SLAB		(0x80000000UL)
+#define CFLGS_ENVIDS		(0x04000000UL)
+#define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
+#define ENVIDS(x)	((x)->flags & CFLGS_ENVIDS)
+
+static inline unsigned int kmem_cache_memusage(kmem_cache_t *cache)
+{
+#ifdef CONFIG_USER_RESOURCE
+	return cache->objuse;
+#else
+	return 0;
+#endif
+}
+
+static inline unsigned int kmem_obj_memusage(void *obj)
+{
+	kmem_cache_t *cachep;
+
+	cachep = GET_PAGE_CACHE(virt_to_page(obj));
+	return kmem_cache_memusage(cachep);
+}
+
+static inline void kmem_mark_nocharge(kmem_cache_t *cachep)
+{
+	cachep->flags |= SLAB_NO_CHARGE;
+}
+
+#endif /* __KMEM_CACHE_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/kmem_slab.h linux-2.6.9-ve023stab054/include/linux/kmem_slab.h
--- linux-2.6.9-100.orig/include/linux/kmem_slab.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/kmem_slab.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,69 @@
+#ifndef __KMEM_SLAB_H__
+#define __KMEM_SLAB_H__
+
+/*
+ * kmem_bufctl_t:
+ *
+ * Bufctl's are used for linking objs within a slab
+ * linked offsets.
+ *
+ * This implementation relies on "struct page" for locating the cache &
+ * slab an object belongs to.
+ * This allows the bufctl structure to be small (one int), but limits
+ * the number of objects a slab (not a cache) can contain when off-slab
+ * bufctls are used. The limit is the size of the largest general cache
+ * that does not use off-slab slabs.
+ * For 32bit archs with 4 kB pages, is this 56.
+ * This is not serious, as it is only for large objects, when it is unwise
+ * to have too many per slab.
+ * Note: This limit can be raised by introducing a general cache whose size
+ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
+ */
+
+#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
+#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
+#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
+
+/*
+ * struct slab
+ *
+ * Manages the objs in a slab. Placed either at the beginning of mem allocated
+ * for a slab, or allocated from an general cache.
+ * Slabs are chained into three list: fully used, partial, fully free slabs.
+ */
+struct slab {
+	struct list_head	list;
+	unsigned long		colouroff;
+	void			*s_mem;		/* including colour offset */
+	unsigned int		inuse;		/* num of objs active in slab */
+	kmem_bufctl_t		free;
+};
+
+/*
+ * struct slab_rcu
+ *
+ * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
+ * arrange for kmem_freepages to be called via RCU.  This is useful if
+ * we need to approach a kernel structure obliquely, from its address
+ * obtained without the usual locking.  We can lock the structure to
+ * stabilize it and check it's still at the given address, only if we
+ * can be sure that the memory has not been meanwhile reused for some
+ * other kind of object (which our subsystem's lock might corrupt).
+ *
+ * rcu_read_lock before reading the address, then rcu_read_unlock after
+ * taking the spinlock within the structure expected at that address.
+ *
+ * We assume struct slab_rcu can overlay struct slab when destroying.
+ */
+struct slab_rcu {
+	struct rcu_head		head;
+	kmem_cache_t		*cachep;
+	void			*addr;
+};
+
+static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+{
+	return (kmem_bufctl_t *)(slabp+1);
+}
+
+#endif /* __KMEM_SLAB_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/kmod.h linux-2.6.9-ve023stab054/include/linux/kmod.h
--- linux-2.6.9-100.orig/include/linux/kmod.h	2011-06-09 19:22:48.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/kmod.h	2011-06-15 19:26:22.000000000 +0400
@@ -42,8 +42,4 @@ extern int call_usermodehelper(char *pat
 extern void usermodehelper_init(void);
 extern int __exec_usermodehelper(char *path, char **argv, char **envp);
 
-#ifdef CONFIG_HOTPLUG
-extern char hotplug_path [];
-#endif
-
 #endif /* __LINUX_KMOD_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/kobject.h linux-2.6.9-ve023stab054/include/linux/kobject.h
--- linux-2.6.9-100.orig/include/linux/kobject.h	2004-10-19 01:53:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/kobject.h	2011-06-15 19:26:22.000000000 +0400
@@ -22,10 +22,14 @@
 #include <linux/sysfs.h>
 #include <linux/rwsem.h>
 #include <linux/kref.h>
+#include <linux/kobject_uevent.h>
 #include <asm/atomic.h>
 
 #define KOBJ_NAME_LEN	20
 
+/* counter to tag the hotplug event, read only except for the kobject core */
+extern u64 hotplug_seqnum;
+
 struct kobject {
 	char			* k_name;
 	char			name[KOBJ_NAME_LEN];
@@ -59,9 +63,7 @@ extern void kobject_unregister(struct ko
 extern struct kobject * kobject_get(struct kobject *);
 extern void kobject_put(struct kobject *);
 
-extern void kobject_hotplug(const char *action, struct kobject *);
-
-extern char * kobject_get_path(struct kset *, struct kobject *, int);
+extern char * kobject_get_path(struct kobject *, int);
 
 struct kobj_type {
 	void (*release)(struct kobject *);
@@ -234,5 +236,19 @@ struct subsys_attribute {
 extern int subsys_create_file(struct subsystem * , struct subsys_attribute *);
 extern void subsys_remove_file(struct subsystem * , struct subsys_attribute *);
 
+#ifdef CONFIG_HOTPLUG
+void kobject_hotplug(struct kobject *kobj, enum kobject_action action);
+int add_hotplug_env_var(char **envp, int num_envp, int *cur_index,
+			char *buffer, int buffer_size, int *cur_len,
+			const char *format, ...)
+	__attribute__((format (printf, 7, 8)));
+#else
+static inline void kobject_hotplug(struct kobject *kobj, enum kobject_action action) { }
+static inline int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, 
+				      char *buffer, int buffer_size, int *cur_len, 
+				      const char *format, ...)
+{ return 0; }
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _KOBJECT_H_ */
diff -Nurap linux-2.6.9-100.orig/include/linux/kobject_uevent.h linux-2.6.9-ve023stab054/include/linux/kobject_uevent.h
--- linux-2.6.9-100.orig/include/linux/kobject_uevent.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/kobject_uevent.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,58 @@
+/*
+ * kobject_uevent.h - list of kobject user events that can be generated
+ *
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#ifndef _KOBJECT_EVENT_H_
+#define _KOBJECT_EVENT_H_
+
+#define HOTPLUG_PATH_LEN	256
+
+/* path to the hotplug userspace helper executed on an event */
+extern char hotplug_path[];
+
+/*
+ * If you add an action here, you must also add the proper string to the
+ * lib/kobject_uevent.c file.
+ */
+typedef int __bitwise kobject_action_t;
+enum kobject_action {
+	KOBJ_ADD	= (__force kobject_action_t) 0x01,	/* add event, for hotplug */
+	KOBJ_REMOVE	= (__force kobject_action_t) 0x02,	/* remove event, for hotplug */
+	KOBJ_CHANGE	= (__force kobject_action_t) 0x03,	/* a sysfs attribute file has changed */
+	KOBJ_MOUNT	= (__force kobject_action_t) 0x04,	/* mount event for block devices */
+	KOBJ_UMOUNT	= (__force kobject_action_t) 0x05,	/* umount event for block devices */
+	KOBJ_START	= (__force kobject_action_t) 0x08,	/* start subsystem */
+	KOBJ_STOP	= (__force kobject_action_t) 0x09,	/* stop subsystem */
+	KOBJ_REBOOT	= (__force kobject_action_t) 0x0a,	/* reboot subsystem */
+};
+
+
+#ifdef CONFIG_KOBJECT_UEVENT
+int kobject_uevent(struct kobject *kobj,
+		   enum kobject_action action,
+		   struct attribute *attr);
+int kobject_uevent_atomic(struct kobject *kobj,
+			  enum kobject_action action,
+			  struct attribute *attr);
+#else
+static inline int kobject_uevent(struct kobject *kobj,
+				 enum kobject_action action,
+				 struct attribute *attr)
+{
+	return 0;
+}
+static inline int kobject_uevent_atomic(struct kobject *kobj,
+				        enum kobject_action action,
+					struct attribute *attr)
+{
+	return 0;
+}
+#endif
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/linux/list.h linux-2.6.9-ve023stab054/include/linux/list.h
--- linux-2.6.9-100.orig/include/linux/list.h	2011-06-09 19:22:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/list.h	2011-06-15 19:26:19.000000000 +0400
@@ -320,6 +320,12 @@ static inline void list_splice_init(stru
 #define list_entry(ptr, type, member) \
 	container_of(ptr, type, member)
 
+#define list_first_entry(ptr, type, member) \
+	container_of((ptr)->next, type, member)
+
+#define list_first_entry(ptr, type, member) \
+	container_of((ptr)->next, type, member)
+
 /**
  * list_for_each	-	iterate over a list
  * @pos:	the &struct list_head to use as a loop counter.
@@ -412,6 +418,20 @@ static inline void list_splice_init(stru
 		     prefetch(pos->member.next))
 
 /**
+ * list_for_each_entry_continue_reverse - iterate backwards over list of given
+ *			type continuing after existing point
+ * @pos:	the type * to use as a loop counter.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_continue_reverse(pos, head, member) 	\
+	for (pos = list_entry(pos->member.prev, typeof(*pos), member),	\
+		     prefetch(pos->member.prev);			\
+	     &pos->member != (head);					\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member),	\
+		     prefetch(pos->member.prev))
+
+/**
  * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
  * @pos:	the type * to use as a loop counter.
  * @n:		another type * to use as temporary storage
diff -Nurap linux-2.6.9-100.orig/include/linux/major.h linux-2.6.9-ve023stab054/include/linux/major.h
--- linux-2.6.9-100.orig/include/linux/major.h	2004-10-19 01:53:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/major.h	2011-06-15 19:26:19.000000000 +0400
@@ -165,4 +165,7 @@
 
 #define VIOTAPE_MAJOR		230
 
+#define UNNAMED_EXTRA_MAJOR		130
+#define UNNAMED_EXTRA_MAJOR_COUNT	120
+
 #endif
diff -Nurap linux-2.6.9-100.orig/include/linux/mm.h linux-2.6.9-ve023stab054/include/linux/mm.h
--- linux-2.6.9-100.orig/include/linux/mm.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/mm.h	2011-06-15 19:26:19.000000000 +0400
@@ -209,6 +209,9 @@ typedef unsigned long page_flags_t;
  * moment. Note that we have no way to track which tasks are using
  * a page.
  */
+struct user_beancounter;
+struct page_beancounter;
+
 struct page {
 	page_flags_t flags;		/* Atomic flags, some possibly
 					 * updated asynchronously */
@@ -247,6 +250,10 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
+	union {
+		struct user_beancounter	*page_ub;
+		struct page_beancounter *page_pbc;
+	} bc;
 };
 
 /*
@@ -545,10 +552,8 @@ struct page *shmem_nopage(struct vm_area
 int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
 struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 					unsigned long addr);
-int shmem_lock(struct file *file, int lock, struct user_struct *user);
 #else
 #define shmem_nopage filemap_nopage
-#define shmem_lock(a, b, c) 	({0;})	/* always in memory, no need to lock */
 #define shmem_set_policy(a, b)	(0)
 #define shmem_get_policy(a, b)	(NULL)
 #endif
@@ -794,6 +799,7 @@ extern struct vm_area_struct *find_exten
 extern struct page * vmalloc_to_page(void *addr);
 extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
 		int write);
+extern struct page * follow_page_k(unsigned long address, int write);
 extern struct page * follow_page_pte(struct mm_struct *mm,
 		unsigned long address, int write, pte_t *pte);
 extern int remap_page_range(struct vm_area_struct *vma, unsigned long from,
@@ -839,5 +845,25 @@ extern struct vm_area_struct *get_gate_v
 int in_gate_area(struct task_struct *task, unsigned long addr);
 #endif
 
+/*
+ * Common MM functions for inclusion in the VFS
+ * or in other stackable file systems.  Some of these
+ * functions were in linux/mm/ C files.
+ *
+ */
+static inline int sync_page(struct page *page)
+{
+	struct address_space *mapping;
+
+	/*
+	 * FIXME, fercrissake.  What is this barrier here for?
+	 */
+	smp_mb();
+	mapping = page_mapping(page);
+	if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
+		return mapping->a_ops->sync_page(page);
+	return 0;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/module.h linux-2.6.9-ve023stab054/include/linux/module.h
--- linux-2.6.9-100.orig/include/linux/module.h	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/module.h	2011-06-15 19:26:20.000000000 +0400
@@ -188,7 +188,7 @@ void *__symbol_get_gpl(const char *symbo
 	__EXPORT_SYMBOL(sym, "")
 
 #define EXPORT_SYMBOL_GPL(sym)					\
-	__EXPORT_SYMBOL(sym, "_gpl")
+	__EXPORT_SYMBOL(sym, "")
 
 #endif
 
diff -Nurap linux-2.6.9-100.orig/include/linux/mount.h linux-2.6.9-ve023stab054/include/linux/mount.h
--- linux-2.6.9-100.orig/include/linux/mount.h	2004-10-19 01:53:11.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/mount.h	2011-06-15 19:26:19.000000000 +0400
@@ -63,7 +63,7 @@ static inline void mntput(struct vfsmoun
 
 extern void free_vfsmnt(struct vfsmount *mnt);
 extern struct vfsmount *alloc_vfsmnt(const char *name);
-extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
+extern struct vfsmount *do_kern_mount(struct file_system_type *type, int flags,
 				      const char *name, void *data);
 
 struct nameidata;
diff -Nurap linux-2.6.9-100.orig/include/linux/msg.h linux-2.6.9-ve023stab054/include/linux/msg.h
--- linux-2.6.9-100.orig/include/linux/msg.h	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/msg.h	2011-06-15 19:26:21.000000000 +0400
@@ -91,6 +91,14 @@ struct msg_queue {
 	struct list_head q_senders;
 };
 
+int sysvipc_walk_msg(int (*func)(int, struct msg_queue*, void *), void *arg);
+int sysvipc_setup_msg(key_t key, int msqid, int msgflg);
+int sysv_msg_store(struct msg_msg *msg,
+		   int (*store)(void * src, int len, int offset, void * data),
+		   int len, void * data);
+struct msg_msg *sysv_msg_load(int (*load)(void * dst, int len, int offset,
+					  void * data), int len, void * data);
+
 /* Helper routines for sys_msgsnd and sys_msgrcv */
 extern long do_msgsnd(int msqid, long mtype, void __user *mtext,
 			size_t msgsz, int msgflg);
diff -Nurap linux-2.6.9-100.orig/include/linux/namei.h linux-2.6.9-ve023stab054/include/linux/namei.h
--- linux-2.6.9-100.orig/include/linux/namei.h	2011-06-09 19:22:59.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/namei.h	2011-06-15 19:26:19.000000000 +0400
@@ -45,7 +45,10 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
 #define LOOKUP_CONTINUE		 4
 #define LOOKUP_PARENT		16
 #define LOOKUP_NOALT		32
-#define LOOKUP_ATOMIC		64
+/* VvS: to fix O_ATOMICLOOKUP incompatibility with mainstream
+ * #define LOOKUP_ATOMIC		64
+ */
+#define LOOKUP_ATOMIC		0
 #define LOOKUP_REVAL		128
 
 /*
@@ -54,6 +57,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
 #define LOOKUP_OPEN		(0x0100)
 #define LOOKUP_CREATE		(0x0200)
 #define LOOKUP_ACCESS		(0x0400)
+#define LOOKUP_NOAREACHECK	(0x1000)	/* no area check on lookup */
+#define LOOKUP_STRICT		(0x2000)	/* no symlinks or other filesystems */
 
 extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
 #define user_path_walk(name,nd) \
diff -Nurap linux-2.6.9-100.orig/include/linux/netdevice.h linux-2.6.9-ve023stab054/include/linux/netdevice.h
--- linux-2.6.9-100.orig/include/linux/netdevice.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netdevice.h	2011-06-15 19:26:22.000000000 +0400
@@ -37,6 +37,7 @@
 #include <linux/config.h>
 #include <linux/device.h>
 #include <linux/percpu.h>
+#include <linux/ctype.h>
 
 struct divert_blk;
 struct vlan_group;
@@ -253,6 +254,11 @@ struct netdev_boot_setup {
 };
 #define NETDEV_BOOT_SETUP_MAX 8
 
+struct netdev_bc {
+	struct user_beancounter *exec_ub, *owner_ub;
+};
+
+#define netdev_bc(dev)		(&(dev)->dev_bc)
 
 /*
  *	The DEVICE structure.
@@ -346,6 +352,7 @@ struct net_device
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address	*/
 	unsigned char		addr_len;	/* hardware address length	*/
+	unsigned char		is_leaked;	/* if we decided to leak it	*/
 #ifndef __GENKSYMS__
 	/*
 	 * Private data size is limited to 64kB
@@ -405,6 +412,7 @@ struct net_device
 	enum { NETREG_UNINITIALIZED=0,
 	       NETREG_REGISTERING,	/* called register_netdevice */
 	       NETREG_REGISTERED,	/* completed register todo */
+	       NETREG_REGISTER_ERR,	/* register todo failed */
 	       NETREG_UNREGISTERING,	/* called unregister_netdevice */
 	       NETREG_UNREGISTERED,	/* completed unregister todo */
 	       NETREG_RELEASED,		/* called free_netdev */
@@ -424,6 +432,8 @@ struct net_device
 #define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
 #define NETIF_F_TSO		2048	/* Can offload TCP/IP segmentation */
 #define NETIF_F_LLTX		4096	/* LockLess TX */
+#define NETIF_F_VIRTUAL		0x40000000 /* can be registered in ve */
+#define NETIF_F_VENET		0x80000000 /* Device is VENET device */
 
 	/* Called after device is detached from network. */
 	void			(*uninit)(struct net_device *dev);
@@ -493,10 +503,16 @@ struct net_device
 	struct divert_blk	*divert;
 #endif /* CONFIG_NET_DIVERT */
 
+	struct ve_struct	*owner_env; /* Owner VE of the interface */
+	struct netdev_bc        dev_bc;
+
 	/* class/net/name entry */
 	struct class_device	class_dev;
 	/* how much padding had been added by alloc_netdev() */
 	int padded;
+
+	/* List entry in global devices list to keep track of their names assignment */
+	struct list_head	dev_global_list_entry;
 };
 
 /*
@@ -548,8 +564,21 @@ struct packet_type {
 
 extern struct net_device		loopback_dev;		/* The loopback */
 extern struct net_device		*dev_base;		/* All devices */
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define visible_loopback_dev	(*get_exec_env()->_loopback_dev)
+#define dev_base		(get_exec_env()->_net_dev_base)
+#define visible_dev_head(x)	(&(x)->_net_dev_head)
+#define visible_dev_index_head(x) (&(x)->_net_dev_index_head)
+#else
+#define visible_loopback_dev	loopback_dev
+#define visible_dev_head(x)	NULL
+#define visible_dev_index_head(x) NULL
+#endif
 extern rwlock_t				dev_base_lock;		/* Device list lock */
 
+struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env);
+struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env);
+
 extern int			netdev_boot_setup_add(char *name, struct ifmap *map);
 extern int 			netdev_boot_setup_check(struct net_device *dev);
 extern unsigned long		netdev_boot_base(const char *prefix, int unit);
@@ -569,11 +598,14 @@ extern int		dev_close(struct net_device 
 extern int		dev_queue_xmit(struct sk_buff *skb);
 extern int		register_netdevice(struct net_device *dev);
 extern int		unregister_netdevice(struct net_device *dev);
+extern void		netdevice_notify(int event, struct net_device *dev);
 extern void		free_netdev(struct net_device *dev);
 extern void		synchronize_net(void);
 extern int 		register_netdevice_notifier(struct notifier_block *nb);
 extern int		unregister_netdevice_notifier(struct notifier_block *nb);
 extern int		call_netdevice_notifiers(unsigned long val, void *v);
+extern int		dev_new_index(struct net_device *dev);
+extern void		dev_free_index(struct net_device *dev);
 extern struct net_device	*dev_get_by_index(int ifindex);
 extern struct net_device	*__dev_get_by_index(int ifindex);
 extern int		dev_restart(struct net_device *dev);
@@ -1025,6 +1057,18 @@ static inline int skb_bond_should_drop(s
 	return 0;
 }
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+static inline int ve_is_dev_movable(struct net_device *dev)
+{
+	return !(dev->features & NETIF_F_VIRTUAL);
+}
+#else
+static inline int ve_is_dev_movable(struct net_device *dev)
+{
+	return 0;
+}
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif	/* _LINUX_DEV_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter.h linux-2.6.9-ve023stab054/include/linux/netfilter.h
--- linux-2.6.9-100.orig/include/linux/netfilter.h	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter.h	2011-06-15 19:26:19.000000000 +0400
@@ -25,6 +25,8 @@
 #define NFC_UNKNOWN 0x4000
 #define NFC_ALTERED 0x8000
 
+#define NFC_IPT_MASK (0x00FFFFFF)
+
 #ifdef __KERNEL__
 #include <linux/config.h>
 #ifdef CONFIG_NETFILTER
@@ -93,6 +95,9 @@ struct nf_info
 int nf_register_hook(struct nf_hook_ops *reg);
 void nf_unregister_hook(struct nf_hook_ops *reg);
 
+int visible_nf_register_hook(struct nf_hook_ops *reg);
+int visible_nf_unregister_hook(struct nf_hook_ops *reg);
+
 /* Functions to register get/setsockopt ranges (non-inclusive).  You
    need to check permissions yourself! */
 int nf_register_sockopt(struct nf_sockopt_ops *reg);
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_conntrack.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_conntrack.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_conntrack.h	2011-06-15 19:26:20.000000000 +0400
@@ -166,6 +166,11 @@ struct ip_conntrack_counter
 
 struct ip_conntrack_helper;
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/ve.h>
+#include <linux/ve_owner.h>
+#endif
+
 struct ip_conntrack
 {
 	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
@@ -223,11 +228,21 @@ struct ip_conntrack
 	u_int32_t last_ack;
 	u_int32_t last_win;
 #endif
+#ifdef CONFIG_VE_IPTABLES
+        struct ve_struct *ct_owner_env;
+#endif
 };
 
+#ifdef CONFIG_VE_IPTABLES
+DCL_VE_OWNER_PROTO(CT, , struct ip_conntrack, ct_owner_env, , )
+#endif
+
 /* get master conntrack via master expectation */
 #define master_ct(conntr) (conntr->master ? conntr->master->expectant : NULL)
 
+/* add conntrack entry to hash tables */
+extern void ip_conntrack_hash_insert(struct ip_conntrack *ct);
+
 /* Alter reply tuple (maybe alter helper).  If it's already taken,
    return 0 and don't do alteration. */
 extern int
@@ -251,10 +266,17 @@ ip_conntrack_get(const struct sk_buff *s
 /* decrement reference count on a conntrack */
 extern inline void ip_conntrack_put(struct ip_conntrack *ct);
 
+/* allocate conntrack structure */
+extern struct ip_conntrack *ip_conntrack_alloc(struct user_beancounter *ub);
+
 /* find unconfirmed expectation based on tuple */
 struct ip_conntrack_expect *
 ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
 
+/* insert expecation into lists */
+void ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
+				struct ip_conntrack *related_to);
+
 /* decrement reference count on an expectation */
 void ip_conntrack_expect_put(struct ip_conntrack_expect *exp);
 
@@ -279,7 +301,15 @@ extern int ip_conntrack_tcp_update(struc
 				   int dir);
 
 /* Call me when a conntrack is destroyed. */
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_conntrack_destroyed	\
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_destroyed)
+#else
 extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
+#define ve_ip_conntrack_destroyed	ip_conntrack_destroyed
+#endif
+
 
 /* Fake conntrack entry for untracked connections */
 extern struct ip_conntrack ip_conntrack_untracked;
@@ -304,6 +334,7 @@ static inline int is_confirmed(struct ip
 }
 
 extern unsigned int ip_conntrack_htable_size;
+extern int ip_conntrack_disable_ve0;
  
 struct ip_conntrack_stat
 {
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_conntrack_core.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_conntrack_core.h	2011-06-15 19:26:19.000000000 +0400
@@ -50,8 +50,22 @@ static inline int ip_conntrack_confirm(s
 	return NF_ACCEPT;
 }
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_conntrack_hash	\
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_hash)
+#define ve_ip_conntrack_expect_list \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_expect_list)
+#define ve_ip_conntrack_vmalloc \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_vmalloc)
+#else
 extern struct list_head *ip_conntrack_hash;
 extern struct list_head ip_conntrack_expect_list;
+#define ve_ip_conntrack_hash		ip_conntrack_hash
+#define ve_ip_conntrack_expect_list	ip_conntrack_expect_list
+#define ve_ip_conntrack_vmalloc		ip_conntrack_vmalloc
+#endif /* CONFIG_VE_IPTABLES */
+
 DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
 DECLARE_RWLOCK_EXTERN(ip_conntrack_expect_tuple_lock);
 #endif /* _IP_CONNTRACK_CORE_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_conntrack_helper.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2004-10-19 01:53:22.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2011-06-15 19:26:19.000000000 +0400
@@ -36,6 +36,9 @@ extern void ip_conntrack_helper_unregist
 extern struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple);
 
 
+extern int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *);
+extern void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
+
 /* Allocate space for an expectation: this is mandatory before calling 
    ip_conntrack_expect_related. */
 extern struct ip_conntrack_expect *ip_conntrack_expect_alloc(void);
@@ -46,4 +49,5 @@ extern int ip_conntrack_change_expect(st
 				      struct ip_conntrack_tuple *newtuple);
 extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
 
+extern struct list_head helpers;
 #endif /*_IP_CONNTRACK_HELPER_H*/
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2004-10-19 01:55:27.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2011-06-15 19:26:19.000000000 +0400
@@ -56,7 +56,14 @@ struct ip_conntrack_protocol
 };
 
 #define MAX_IP_CT_PROTO 256
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_ct_protos \
+	(get_exec_env()->_ip_conntrack->_ip_ct_protos)
+#else
 extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+#define ve_ip_ct_protos			ip_ct_protos
+#endif /* CONFIG_VE_IPTABLES */
 
 /* Protocol registration. */
 extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
@@ -64,7 +71,7 @@ extern void ip_conntrack_protocol_unregi
 
 static inline struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
 {
-	return ip_ct_protos[protocol];
+	return ve_ip_ct_protos[protocol];
 }
 
 /* Existing built-in protocols */
@@ -74,16 +81,51 @@ extern struct ip_conntrack_protocol ip_c
 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
 extern int ip_conntrack_protocol_tcp_init(void);
 
+#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_SYSCTL)
+#include <linux/sched.h>
+#define ve_ip_ct_tcp_timeouts \
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeouts)
+#define ve_ip_ct_udp_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout)
+#define ve_ip_ct_udp_timeout_stream \
+	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout_stream)
+#define ve_ip_ct_icmp_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_icmp_timeout)
+#define ve_ip_ct_generic_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_generic_timeout)
+#define ve_ip_ct_log_invalid	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_log_invalid)
+#define ve_ip_ct_tcp_timeout_max_retrans \
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeout_max_retrans)
+#define ve_ip_ct_tcp_loose	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_loose)
+#define ve_ip_ct_tcp_be_liberal	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_be_liberal)
+#define ve_ip_ct_tcp_max_retrans	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_max_retrans)
+#else
+#define ve_ip_ct_tcp_timeouts		*tcp_timeouts
+#define ve_ip_ct_udp_timeout		ip_ct_udp_timeout
+#define ve_ip_ct_udp_timeout_stream	ip_ct_udp_timeout_stream
+#define ve_ip_ct_icmp_timeout		ip_ct_icmp_timeout
+#define ve_ip_ct_generic_timeout	ip_ct_generic_timeout
+#define ve_ip_ct_log_invalid		ip_ct_log_invalid
+#define ve_ip_ct_tcp_timeout_max_retrans ip_ct_tcp_timeout_max_retrans
+#define ve_ip_ct_tcp_loose		ip_ct_tcp_loose
+#define ve_ip_ct_tcp_be_liberal		ip_ct_tcp_be_liberal
+#define ve_ip_ct_tcp_max_retrans	ip_ct_tcp_max_retrans
+#endif
+
 /* Log invalid packets */
 extern unsigned int ip_ct_log_invalid;
 
 #ifdef CONFIG_SYSCTL
 #ifdef DEBUG_INVALID_PACKETS
 #define LOG_INVALID(proto) \
-	(ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
+	(ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW)
 #else
 #define LOG_INVALID(proto) \
-	((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
+	((ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW) \
 	 && net_ratelimit())
 #endif
 #else
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_nat.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_nat.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_nat.h	2004-10-19 01:55:29.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_nat.h	2011-06-15 19:26:22.000000000 +0400
@@ -1,5 +1,6 @@
 #ifndef _IP_NAT_H
 #define _IP_NAT_H
+#include <linux/config.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
 
@@ -55,6 +56,23 @@ struct ip_nat_multi_range
 	struct ip_nat_range range[1];
 };
 
+#ifdef CONFIG_COMPAT
+#include <net/compat.h>
+
+struct compat_ip_nat_range
+{
+	compat_uint_t flags;
+	u_int32_t min_ip, max_ip;
+	union ip_conntrack_manip_proto min, max;
+};
+
+struct compat_ip_nat_multi_range
+{
+	compat_uint_t rangesize;
+	struct compat_ip_nat_range range[1];
+};
+#endif
+
 /* Worst case: local-out manip + 1 post-routing, and reverse dirn. */
 #define IP_NAT_MAX_MANIPS (2*3)
 
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_nat_core.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_nat_core.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_nat_core.h	2004-10-19 01:53:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_nat_core.h	2011-06-15 19:26:20.000000000 +0400
@@ -23,5 +23,6 @@ extern void replace_in_hashes(struct ip_
 			      struct ip_nat_info *info);
 extern void place_in_hashes(struct ip_conntrack *conntrack,
 			    struct ip_nat_info *info);
+extern int ip_nat_install_conntrack(struct ip_conntrack *conntrack, int helper);
 
 #endif /* _IP_NAT_CORE_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_nat_helper.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_nat_helper.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_nat_helper.h	2004-10-19 01:53:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_nat_helper.h	2011-06-15 19:26:19.000000000 +0400
@@ -40,6 +40,8 @@ struct ip_nat_helper
 
 extern int ip_nat_helper_register(struct ip_nat_helper *me);
 extern void ip_nat_helper_unregister(struct ip_nat_helper *me);
+extern int visible_ip_nat_helper_register(struct ip_nat_helper *me);
+extern void visible_ip_nat_helper_unregister(struct ip_nat_helper *me);
 
 extern struct ip_nat_helper *
 ip_nat_find_helper(const struct ip_conntrack_tuple *tuple);
@@ -65,4 +67,6 @@ extern int ip_nat_mangle_udp_packet(stru
 extern int ip_nat_seq_adjust(struct sk_buff **pskb, 
 			     struct ip_conntrack *ct, 
 			     enum ip_conntrack_info ctinfo);
+
+extern void init_ip_nat_helpers(void);
 #endif
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_nat_protocol.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_nat_protocol.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_nat_protocol.h	2004-10-19 01:54:31.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_nat_protocol.h	2011-06-15 19:26:19.000000000 +0400
@@ -46,7 +46,14 @@ struct ip_nat_protocol
 };
 
 #define MAX_IP_NAT_PROTO 256
+
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_nat_protos (get_exec_env()->_ip_conntrack->_ip_nat_protos)
+#else
 extern struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+#define ve_ip_nat_protos	ip_nat_protos
+#endif /* CONFIG_VE_IPTABLES */
 
 /* Protocol registration. */
 extern int ip_nat_protocol_register(struct ip_nat_protocol *proto);
@@ -54,7 +61,7 @@ extern void ip_nat_protocol_unregister(s
 
 static inline struct ip_nat_protocol *ip_nat_find_proto(u_int8_t protocol)
 {
-	return ip_nat_protos[protocol];
+	return ve_ip_nat_protos[protocol];
 }
 
 /* Built-in protocols. */
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_nat_rule.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_nat_rule.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_nat_rule.h	2004-10-19 01:55:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_nat_rule.h	2011-06-15 19:26:19.000000000 +0400
@@ -6,7 +6,7 @@
 
 #ifdef __KERNEL__
 
-extern int ip_nat_rule_init(void) __init;
+extern int ip_nat_rule_init(void);
 extern void ip_nat_rule_cleanup(void);
 extern int ip_nat_rule_find(struct sk_buff **pskb,
 			    unsigned int hooknum,
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_tables.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ip_tables.h	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ip_tables.h	2011-06-15 19:26:22.000000000 +0400
@@ -16,6 +16,7 @@
 #define _IPTABLES_H
 
 #ifdef __KERNEL__
+#include <linux/config.h>
 #include <linux/if.h>
 #include <linux/types.h>
 #include <linux/in.h>
@@ -340,6 +341,12 @@ ipt_get_target(struct ipt_entry *e)
 #include <linux/init.h>
 extern void ipt_init(void) __init;
 
+#ifdef CONFIG_COMPAT
+#define COMPAT_TO_USER		1
+#define COMPAT_FROM_USER	-1
+#define COMPAT_CALC_SIZE	0
+#endif
+
 struct ipt_match
 {
 	struct list_head list;
@@ -369,6 +376,9 @@ struct ipt_match
 	/* Called when entry of this type deleted. */
 	void (*destroy)(void *matchinfo, unsigned int matchinfosize);
 
+#ifdef CONFIG_COMPAT
+	int (*compat)(void *match, void **dstptr, int *size, int convert);
+#endif
 	/* Set this to THIS_MODULE. */
 	struct module *me;
 };
@@ -403,6 +413,9 @@ struct ipt_target
 			       const void *targinfo,
 			       void *userdata);
 
+#ifdef CONFIG_COMPAT
+	int (*compat)(void *target, void **dstptr, int *size, int convert);
+#endif
 	/* Set this to THIS_MODULE. */
 	struct module *me;
 };
@@ -410,9 +423,15 @@ struct ipt_target
 extern int ipt_register_target(struct ipt_target *target);
 extern void ipt_unregister_target(struct ipt_target *target);
 
+extern int visible_ipt_register_target(struct ipt_target *target);
+extern void visible_ipt_unregister_target(struct ipt_target *target);
+
 extern int ipt_register_match(struct ipt_match *match);
 extern void ipt_unregister_match(struct ipt_match *match);
 
+extern int visible_ipt_register_match(struct ipt_match *match);
+extern void visible_ipt_unregister_match(struct ipt_match *match);
+
 /* Furniture shopping... */
 struct ipt_table
 {
@@ -447,5 +466,75 @@ extern unsigned int ipt_do_table(struct 
 				 void *userdata);
 
 #define IPT_ALIGN(s) (((s) + (__alignof__(struct ipt_entry)-1)) & ~(__alignof__(struct ipt_entry)-1))
+
+#ifdef CONFIG_COMPAT
+#include <net/compat.h>
+
+struct compat_ipt_counters
+{
+	u_int32_t cnt[4];
+};
+
+struct compat_ipt_counters_info
+{
+	char name[IPT_TABLE_MAXNAMELEN];
+	compat_uint_t num_counters;
+	struct compat_ipt_counters counters[0];
+};
+
+struct compat_ipt_getinfo
+{
+	char name[IPT_TABLE_MAXNAMELEN];
+	compat_uint_t valid_hooks;
+	compat_uint_t hook_entry[NF_IP_NUMHOOKS];
+	compat_uint_t underflow[NF_IP_NUMHOOKS];
+	compat_uint_t num_entries;
+	compat_uint_t size;
+};
+
+struct compat_ipt_entry
+{
+	struct ipt_ip ip;
+	compat_uint_t nfcache;
+	u_int16_t target_offset;
+	u_int16_t next_offset;
+	compat_uint_t comefrom;
+	struct compat_ipt_counters counters;
+	unsigned char elems[0];
+};
+
+struct compat_ipt_entry_match
+{
+	union {
+		struct {
+			u_int16_t match_size;
+			char name[IPT_FUNCTION_MAXNAMELEN];
+		} user;
+		u_int16_t match_size;
+	} u;
+	unsigned char data[0];
+};
+
+struct compat_ipt_entry_target
+{
+	union {
+		struct {
+			u_int16_t target_size;
+			char name[IPT_FUNCTION_MAXNAMELEN];
+		} user;
+		u_int16_t target_size;
+	} u;
+	unsigned char data[0];
+};
+
+#define COMPAT_IPT_ALIGN(s) (((s) + (__alignof__(struct compat_ipt_entry)-1)) \
+		& ~(__alignof__(struct compat_ipt_entry)-1))
+
+extern int ipt_match_align_compat(void *match, void **dstptr,
+		int *size, int off, int convert);
+extern int ipt_target_align_compat(void *target, void **dstptr,
+		int *size, int off, int convert);
+
+#endif /* CONFIG_COMPAT */
 #endif /*__KERNEL__*/
 #endif /* _IPTABLES_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ipt_conntrack.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ipt_conntrack.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ipt_conntrack.h	2004-10-19 01:54:54.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ipt_conntrack.h	2011-06-15 19:26:22.000000000 +0400
@@ -5,6 +5,8 @@
 #ifndef _IPT_CONNTRACK_H
 #define _IPT_CONNTRACK_H
 
+#include <linux/config.h>
+
 #define IPT_CONNTRACK_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
 #define IPT_CONNTRACK_STATE_INVALID (1 << 0)
 
@@ -36,4 +38,21 @@ struct ipt_conntrack_info
 	/* Inverse flags */
 	u_int8_t invflags;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_ipt_conntrack_info
+{
+	compat_uint_t statemask, statusmask;
+
+	struct ip_conntrack_tuple tuple[IP_CT_DIR_MAX];
+	struct in_addr sipmsk[IP_CT_DIR_MAX], dipmsk[IP_CT_DIR_MAX];
+
+	compat_ulong_t expires_min, expires_max;
+
+	/* Flags word */
+	u_int8_t flags;
+	/* Inverse flags */
+	u_int8_t invflags;
+};
+#endif
 #endif /*_IPT_CONNTRACK_H*/
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ipt_helper.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ipt_helper.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ipt_helper.h	2004-10-19 01:54:08.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ipt_helper.h	2011-06-15 19:26:22.000000000 +0400
@@ -1,8 +1,17 @@
 #ifndef _IPT_HELPER_H
 #define _IPT_HELPER_H
 
+#include <linux/config.h>
+
 struct ipt_helper_info {
 	int invert;
 	char name[30];
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_ipt_helper_info {
+	compat_int_t invert;
+	char name[30];
+};
+#endif
 #endif /* _IPT_HELPER_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ipt_limit.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ipt_limit.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ipt_limit.h	2004-10-19 01:54:32.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ipt_limit.h	2011-06-15 19:26:22.000000000 +0400
@@ -1,6 +1,8 @@
 #ifndef _IPT_RATE_H
 #define _IPT_RATE_H
 
+#include <linux/config.h>
+
 /* timings are in milliseconds. */
 #define IPT_LIMIT_SCALE 10000
 
@@ -18,4 +20,20 @@ struct ipt_rateinfo {
 	/* Ugly, ugly fucker. */
 	struct ipt_rateinfo *master;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_ipt_rateinfo {
+	u_int32_t avg;    /* Average secs between packets * scale */
+	u_int32_t burst;  /* Period multiplier for upper limit. */
+
+	/* Used internally by the kernel */
+	compat_ulong_t prev;
+	u_int32_t credit;
+	u_int32_t credit_cap, cost;
+
+	/* Ugly, ugly fucker. */
+	compat_uptr_t master;
+};
+#endif
+
 #endif /*_IPT_RATE_H*/
diff -Nurap linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ipt_state.h linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ipt_state.h
--- linux-2.6.9-100.orig/include/linux/netfilter_ipv4/ipt_state.h	2004-10-19 01:55:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netfilter_ipv4/ipt_state.h	2011-06-15 19:26:22.000000000 +0400
@@ -1,6 +1,8 @@
 #ifndef _IPT_STATE_H
 #define _IPT_STATE_H
 
+#include <linux/config.h>
+
 #define IPT_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
 #define IPT_STATE_INVALID (1 << 0)
 
@@ -10,4 +12,11 @@ struct ipt_state_info
 {
 	unsigned int statemask;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_ipt_state_info
+{
+	compat_uint_t statemask;
+};
+#endif
 #endif /*_IPT_STATE_H*/
diff -Nurap linux-2.6.9-100.orig/include/linux/netlink.h linux-2.6.9-ve023stab054/include/linux/netlink.h
--- linux-2.6.9-100.orig/include/linux/netlink.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/netlink.h	2011-06-15 19:26:22.000000000 +0400
@@ -17,7 +17,10 @@
 #define NETLINK_ROUTE6		11	/* af_inet6 route comm channel */
 #define NETLINK_IP6_FW		13
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
-#define NETLINK_TAPBASE		16	/* 16 to 31 are ethertap */
+#define NETLINK_KOBJECT_UEVENT	15	/* Kernel messages to userspace */
+#define NETLINK_TAPBASE		16	/* NETLINK_TAPBASE to NETLINK_LAST are ethertap */
+#define NETLINK_TAPLAST		30	/* NETLINK_TAPBASE to NETLINK_LAST are ethertap */
+#define NETLINK_VZEVENT		31	/* VZ events */
 
 #define MAX_LINKS 32		
 
@@ -101,6 +104,20 @@ enum {
 #include <linux/capability.h>
 #include <linux/skbuff.h>
 
+struct netlink_opt
+{
+	u32			pid;
+	unsigned		groups;
+	u32			dst_pid;
+	unsigned		dst_groups;
+	unsigned long		state;
+	int			(*handler)(int unit, struct sk_buff *skb);
+	wait_queue_head_t	wait;
+	struct netlink_callback	*cb;
+	spinlock_t		cb_lock;
+	void			(*data_ready)(struct sock *sk, int bytes);
+};
+
 struct netlink_skb_parms
 {
 	struct ucred		creds;		/* Skb credentials	*/
@@ -131,7 +148,7 @@ extern int netlink_unregister_notifier(s
 /* finegrained unicast helpers: */
 struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid);
 struct sock *netlink_getsockbyfilp(struct file *filp);
-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo);
+int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo, struct sock *ssk);
 void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
 int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol);
 
diff -Nurap linux-2.6.9-100.orig/include/linux/nfcalls.h linux-2.6.9-ve023stab054/include/linux/nfcalls.h
--- linux-2.6.9-100.orig/include/linux/nfcalls.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/nfcalls.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,222 @@
+/*
+ *  include/linux/nfcalls.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_NFCALLS_H
+#define _LINUX_NFCALLS_H
+
+#include <linux/rcupdate.h>
+
+#ifdef CONFIG_MODULES
+extern struct module no_module;
+
+#define DECL_KSYM_MODULE(name)				\
+	extern struct module *vz_mod_##name
+
+#define INIT_KSYM_MODULE(name)				\
+	struct module *vz_mod_##name = &no_module;	\
+	EXPORT_SYMBOL(vz_mod_##name)
+
+static inline void __vzksym_modresolve(struct module **modp, struct module *mod)
+{
+	/*
+	 * we want to be sure, that pointer updates are visible first:
+	 * 1. wmb() is here only for piece of sure
+	 *    (note, no rmb() in KSYMSAFECALL)
+	 * 2. synchronize_kernel() guarantees that updates are visible
+	 *    on all cpus and allows us to remove rmb() in KSYMSAFECALL
+	 */
+	wmb(); synchronize_kernel();
+	*modp = mod;
+	/* just to be sure, our changes are visible as soon as possible */
+	wmb(); synchronize_kernel();
+}
+
+static inline void __vzksym_modunresolve(struct module **modp)
+{
+	/*
+	 * try_module_get() in KSYMSAFECALL should fail at this moment since
+	 * THIS_MODULE in in unloading state (we should be called from fini),
+	 * no need to syncronize pointers/ve_module updates.
+	 */
+	*modp = &no_module;
+	/*
+	 * synchronize_kernel() guarantees here that we see
+	 * updated module pointer before the module really gets away
+	 */
+	synchronize_kernel();
+}
+
+static inline int __vzksym_module_get(struct module *mod)
+{
+	/*
+	 * we want to avoid rmb(), so use synchronize_kernel() in KSYMUNRESOLVE
+	 * and smp_read_barrier_depends() here...
+	 */
+	smp_read_barrier_depends(); /* for module loading */
+	if (!try_module_get(mod))
+		return -EBUSY;
+
+	return 0;
+}
+
+static inline void __vzksym_module_put(struct module *mod)
+{
+	module_put(mod);
+}
+#else
+#define DECL_KSYM_MODULE(name)
+#define INIT_KSYM_MODULE(name)
+#define __vzksym_modresolve(modp, mod)
+#define __vzksym_modunresolve(modp)
+#define __vzksym_module_get(mod)			(0)
+#define __vzksym_module_put(mod)
+#endif
+
+#define __KSYMERRCALL(err, type, mod, name, args)	\
+({							\
+	type ret = (type)err;				\
+	if (!__vzksym_module_get(vz_mod_##mod))	{	\
+		if (vz_##name)				\
+			ret = ((*vz_##name)args); 	\
+		__vzksym_module_put(vz_mod_##mod);	\
+	}						\
+	ret;						\
+})
+#define __KSYMSAFECALL_VOID(mod, name, args)		\
+do {							\
+	if (!__vzksym_module_get(vz_mod_##mod)) {	\
+		if (vz_##name)				\
+			((*vz_##name)args); 		\
+		__vzksym_module_put(vz_mod_##mod);	\
+	}						\
+} while (0)
+
+#define DECL_KSYM_CALL(type, name, args)		\
+	extern type (*vz_##name) args
+#define INIT_KSYM_CALL(type, name, args)		\
+	type (*vz_##name) args;				\
+	EXPORT_SYMBOL(vz_##name)
+
+#define KSYMERRCALL(err, mod, name, args)		\
+	__KSYMERRCALL(err, int, mod, name, args)
+#define KSYMSAFECALL(type, mod, name, args)		\
+	__KSYMERRCALL(0, type, mod, name, args)
+#define KSYMSAFECALL_VOID(mod, name, args)		\
+	__KSYMSAFECALL_VOID(mod, name, args)
+#define KSYMREF(name)					vz_##name
+
+/* should be called _after_ KSYMRESOLVE's */
+#define KSYMMODRESOLVE(name)				\
+	__vzksym_modresolve(&vz_mod_##name, THIS_MODULE)
+#define KSYMMODUNRESOLVE(name)				\
+	__vzksym_modunresolve(&vz_mod_##name)
+
+#define KSYMRESOLVE(name)				\
+	vz_##name = &name
+#define KSYMUNRESOLVE(name)				\
+	vz_##name = NULL
+
+#if defined(CONFIG_VE_IPTABLES)
+DECL_KSYM_MODULE(ip_tables);
+DECL_KSYM_MODULE(iptable_filter);
+DECL_KSYM_MODULE(iptable_mangle);
+DECL_KSYM_MODULE(ipt_limit);
+DECL_KSYM_MODULE(ipt_multiport);
+DECL_KSYM_MODULE(ipt_tos);
+DECL_KSYM_MODULE(ipt_TOS);
+DECL_KSYM_MODULE(ipt_REJECT);
+DECL_KSYM_MODULE(ipt_TCPMSS);
+DECL_KSYM_MODULE(ipt_tcpmss);
+DECL_KSYM_MODULE(ipt_ttl);
+DECL_KSYM_MODULE(ipt_LOG);
+DECL_KSYM_MODULE(ipt_length);
+DECL_KSYM_MODULE(ip_conntrack);
+DECL_KSYM_MODULE(ip_conntrack_ftp);
+DECL_KSYM_MODULE(ip_conntrack_irc);
+DECL_KSYM_MODULE(ipt_conntrack);
+DECL_KSYM_MODULE(ipt_state);
+DECL_KSYM_MODULE(ipt_helper);
+DECL_KSYM_MODULE(iptable_nat);
+DECL_KSYM_MODULE(ip_nat_ftp);
+DECL_KSYM_MODULE(ip_nat_irc);
+DECL_KSYM_MODULE(ipt_REDIRECT);
+DECL_KSYM_MODULE(ipt_owner);
+
+struct sk_buff;
+
+DECL_KSYM_CALL(int, init_netfilter, (void));
+DECL_KSYM_CALL(int, init_iptables, (void));
+DECL_KSYM_CALL(int, init_iptable_filter, (void));
+DECL_KSYM_CALL(int, init_iptable_mangle, (void));
+DECL_KSYM_CALL(int, init_iptable_limit, (void));
+DECL_KSYM_CALL(int, init_iptable_multiport, (void));
+DECL_KSYM_CALL(int, init_iptable_tos, (void));
+DECL_KSYM_CALL(int, init_iptable_TOS, (void));
+DECL_KSYM_CALL(int, init_iptable_REJECT, (void));
+DECL_KSYM_CALL(int, init_iptable_TCPMSS, (void));
+DECL_KSYM_CALL(int, init_iptable_tcpmss, (void));
+DECL_KSYM_CALL(int, init_iptable_ttl, (void));
+DECL_KSYM_CALL(int, init_iptable_LOG, (void));
+DECL_KSYM_CALL(int, init_iptable_length, (void));
+DECL_KSYM_CALL(int, init_iptable_conntrack, (void));
+DECL_KSYM_CALL(int, init_iptable_ftp, (void));
+DECL_KSYM_CALL(int, init_iptable_irc, (void));
+DECL_KSYM_CALL(int, init_iptable_conntrack_match, (void));
+DECL_KSYM_CALL(int, init_iptable_state, (void));
+DECL_KSYM_CALL(int, init_iptable_helper, (void));
+DECL_KSYM_CALL(int, init_iptable_nat, (void));
+DECL_KSYM_CALL(int, init_iptable_nat_ftp, (void));
+DECL_KSYM_CALL(int, init_iptable_nat_irc, (void));
+DECL_KSYM_CALL(int, init_iptable_REDIRECT, (void));
+DECL_KSYM_CALL(int, init_iptable_owner, (void));
+DECL_KSYM_CALL(void, fini_iptable_owner, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat_irc, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat, (void));
+DECL_KSYM_CALL(void, fini_iptable_helper, (void));
+DECL_KSYM_CALL(void, fini_iptable_state, (void));
+DECL_KSYM_CALL(void, fini_iptable_conntrack_match, (void));
+DECL_KSYM_CALL(void, fini_iptable_irc, (void));
+DECL_KSYM_CALL(void, fini_iptable_ftp, (void));
+DECL_KSYM_CALL(void, fini_iptable_conntrack, (void));
+DECL_KSYM_CALL(void, fini_iptable_length, (void));
+DECL_KSYM_CALL(void, fini_iptable_LOG, (void));
+DECL_KSYM_CALL(void, fini_iptable_ttl, (void));
+DECL_KSYM_CALL(void, fini_iptable_tcpmss, (void));
+DECL_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
+DECL_KSYM_CALL(void, fini_iptable_REJECT, (void));
+DECL_KSYM_CALL(void, fini_iptable_TOS, (void));
+DECL_KSYM_CALL(void, fini_iptable_tos, (void));
+DECL_KSYM_CALL(void, fini_iptable_multiport, (void));
+DECL_KSYM_CALL(void, fini_iptable_limit, (void));
+DECL_KSYM_CALL(void, fini_iptable_filter, (void));
+DECL_KSYM_CALL(void, fini_iptable_mangle, (void));
+DECL_KSYM_CALL(void, fini_iptables, (void));
+DECL_KSYM_CALL(void, fini_netfilter, (void));
+DECL_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
+
+DECL_KSYM_CALL(void, ipt_flush_table, (struct ipt_table *table));
+#endif /* CONFIG_VE_IPTABLES */
+
+#if defined(CONFIG_VE_ETHDEV) || defined(CONFIG_VE_ETHDEV_MODULE)
+DECL_KSYM_MODULE(vzethdev);
+DECL_KSYM_CALL(int, veth_open, (struct net_device *dev));
+#endif
+
+#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
+DECL_KSYM_MODULE(vzmon);
+DECL_KSYM_CALL(int, real_get_device_perms_ve,
+	(int dev_type, dev_t dev, int access_mode));
+DECL_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
+DECL_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
+DECL_KSYM_CALL(void, real_update_load_avg_ve, (void));
+#endif
+
+#endif /* _LINUX_NFCALLS_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/nfs_fs.h linux-2.6.9-ve023stab054/include/linux/nfs_fs.h
--- linux-2.6.9-100.orig/include/linux/nfs_fs.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/nfs_fs.h	2011-06-15 19:26:19.000000000 +0400
@@ -328,7 +328,8 @@ extern int nfs_refresh_inode(struct inod
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int nfs_getattr64(struct vfsmount *, struct dentry *, struct kstat64 *);
-extern int nfs_permission(struct inode *, int, struct nameidata *);
+extern int nfs_permission(struct inode *, int, struct nameidata *,
+			  struct exec_perm *);
 extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
 extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
 extern void nfs_access_zap_cache(struct inode *inode);
diff -Nurap linux-2.6.9-100.orig/include/linux/notifier.h linux-2.6.9-ve023stab054/include/linux/notifier.h
--- linux-2.6.9-100.orig/include/linux/notifier.h	2011-06-09 19:22:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/notifier.h	2011-06-15 19:26:19.000000000 +0400
@@ -27,8 +27,9 @@ extern int notifier_call_chain(struct no
 
 #define NOTIFY_DONE		0x0000		/* Don't care */
 #define NOTIFY_OK		0x0001		/* Suits me */
+#define NOTIFY_FAIL		0x0002		/* Reject */
 #define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
-#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)	/* Bad/Veto action	*/
+#define NOTIFY_BAD		(NOTIFY_STOP_MASK|NOTIFY_FAIL)	/* Bad/Veto action	*/
 /*
  * Clean way to return from the notifier and stop further calls.
  */
diff -Nurap linux-2.6.9-100.orig/include/linux/page-flags.h linux-2.6.9-ve023stab054/include/linux/page-flags.h
--- linux-2.6.9-100.orig/include/linux/page-flags.h	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/page-flags.h	2011-06-15 19:26:21.000000000 +0400
@@ -76,6 +76,8 @@
 #define PG_reclaim		18	/* To be reclaimed asap */
 
 
+#define PG_checkpointed		21	/* Page transferred */
+
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
  * allowed.
diff -Nurap linux-2.6.9-100.orig/include/linux/pid.h linux-2.6.9-ve023stab054/include/linux/pid.h
--- linux-2.6.9-100.orig/include/linux/pid.h	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/pid.h	2011-06-15 19:26:19.000000000 +0400
@@ -1,6 +1,18 @@
 #ifndef _LINUX_PID_H
 #define _LINUX_PID_H
 
+#define VPID_BIT	10
+#define VPID_DIV	(1<<VPID_BIT)
+
+#ifdef CONFIG_VE
+#define __is_virtual_pid(pid)	((pid) & VPID_DIV)
+#define is_virtual_pid(pid)	\
+   (__is_virtual_pid(pid) || ((pid)==1 && !ve_is_super(get_exec_env())))
+#else
+#define __is_virtual_pid(pid)	0
+#define is_virtual_pid(pid)	0
+#endif
+
 enum pid_type
 {
 	PIDTYPE_PID,
@@ -15,6 +27,9 @@ struct pid
 	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
 	int nr;
 	struct hlist_node pid_chain;
+#ifdef CONFIG_VE
+	int vnr;
+#endif
 	/* list of pids with the same nr, only one of them is in the hash */
 	struct list_head pid_list;
 };
@@ -36,24 +51,95 @@ extern void FASTCALL(detach_pid(struct t
  */
 extern struct pid *FASTCALL(find_pid(enum pid_type, int));
 
-extern struct pid *find_ge_pid(int nr);
-
 extern int alloc_pidmap(void);
 extern void FASTCALL(free_pidmap(int));
 extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
 
-#define do_each_task_pid(who, type, task)				\
-	if ((task = find_task_by_pid_type(type, who))) {		\
+#ifndef CONFIG_VE
+
+#define vpid_to_pid(pid)	(pid)
+#define __vpid_to_pid(pid)	(pid)
+#define pid_type_to_vpid(pid, type)	(pid)
+#define __pid_type_to_vpid(pid, type)	(pid)
+
+#define comb_vpid_to_pid(pid)	(pid)
+#define comb_pid_to_vpid(pid)	(pid)
+
+#else
+
+struct ve_struct;
+extern void free_vpid(int vpid, struct ve_struct *ve);
+extern int alloc_vpid(int pid, int vpid);
+extern int vpid_to_pid(int pid);
+extern int __vpid_to_pid(int pid);
+extern pid_t pid_type_to_vpid(int type, pid_t pid);
+extern pid_t _pid_type_to_vpid(int type, pid_t pid);
+
+static inline int comb_vpid_to_pid(int vpid)
+{
+	int pid = vpid;
+
+	if (vpid > 0) {
+		pid = vpid_to_pid(vpid);
+		if (unlikely(pid < 0))
+			return 0;
+	} else if (vpid < 0) {
+		pid = vpid_to_pid(-vpid);
+		if (unlikely(pid < 0))
+			return 0;
+		pid = -pid;
+	}
+	return pid;
+}
+
+static inline int comb_pid_to_vpid(int pid)
+{
+	int vpid = pid;
+
+	if (pid > 0) {
+		vpid = pid_type_to_vpid(PIDTYPE_PID, pid);
+		if (unlikely(vpid < 0))
+			return 0;
+	} else if (pid < 0) {
+		vpid = pid_type_to_vpid(PIDTYPE_PGID, -pid);
+		if (unlikely(vpid < 0))
+			return 0;
+		vpid = -vpid;
+	}
+	return vpid;
+}
+#endif
+
+#define do_each_task_pid_all(who, type, task)				\
+	if ((task = find_task_by_pid_type_all(type, who))) {		\
 		prefetch((task)->pids[type].pid_list.next);		\
 		do {
 
-#define while_each_task_pid(who, type, task)				\
+#define while_each_task_pid_all(who, type, task)			\
 		} while (task = pid_task((task)->pids[type].pid_list.next,\
 						type),			\
 			prefetch((task)->pids[type].pid_list.next),	\
 			hlist_unhashed(&(task)->pids[type].pid_chain));	\
 	}								\
 
+#ifndef CONFIG_VE
+#define __do_each_task_pid_ve(who, type, task, owner)			\
+		do_each_task_pid_all(who, type, task)
+#define __while_each_task_pid_ve(who, type, task, owner)		\
+		while_each_task_pid_all(who, type, task)
+#else /* CONFIG_VE */
+#define __do_each_task_pid_ve(who, type, task, owner)			\
+		do_each_task_pid_all(who, type, task)			\
+			if (ve_accessible(VE_TASK_INFO(task)->owner_env, owner))
+#define __while_each_task_pid_ve(who, type, task, owner)		\
+		while_each_task_pid_all(who, type, task)
+#endif /* CONFIG_VE */
+
+#define do_each_task_pid_ve(who, type, task)				\
+		__do_each_task_pid_ve(who, type, task, get_exec_env());
+#define while_each_task_pid_ve(who, type, task)				\
+		__while_each_task_pid_ve(who, type, task, get_exec_env());
+
 extern int pid_alive(struct task_struct *p);
 
 #endif /* _LINUX_PID_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/proc_fs.h linux-2.6.9-ve023stab054/include/linux/proc_fs.h
--- linux-2.6.9-100.orig/include/linux/proc_fs.h	2011-06-09 19:22:59.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/proc_fs.h	2011-06-15 19:26:20.000000000 +0400
@@ -5,6 +5,7 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/spinlock.h>
+#include <linux/smp_lock.h>
 #include <asm/atomic.h>
 
 /*
@@ -96,8 +97,8 @@ extern void proc_misc_init(void);
 struct mm_struct;
 
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
-struct dentry *proc_pid_unhash(struct task_struct *p);
-void proc_pid_flush(struct dentry *proc_dentry);
+void proc_pid_unhash(struct task_struct *p, struct dentry * [2]);
+void proc_pid_flush(struct dentry *proc_dentry[2]);
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
 unsigned long task_vsize(struct mm_struct *);
 int task_statm(struct mm_struct *, int *, int *, int *, int *);
@@ -105,10 +106,14 @@ char *task_mem(struct mm_struct *, char 
 
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
+extern struct proc_dir_entry *create_proc_glob_entry(const char *name,
+						mode_t mode,
+						struct proc_dir_entry *parent);
 struct proc_dir_entry *proc_create(const char *name, mode_t mode,
 				struct proc_dir_entry *parent,
 				const struct file_operations *proc_fops);
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
+extern void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent);
 
 extern struct vfsmount *proc_mnt;
 extern int proc_fill_super(struct super_block *,void *,int);
@@ -211,6 +216,15 @@ static inline struct proc_dir_entry *pro
 	return newf;
 }
 
+static inline struct proc_dir_entry *__proc_net_fops_create(const char *name,
+	mode_t mode, struct file_operations *fops, struct proc_dir_entry *p)
+{
+	struct proc_dir_entry *res = create_proc_entry(name, mode, p);
+	if (res)
+		res->proc_fops = fops;
+	return res;
+}
+
 static inline struct proc_dir_entry *proc_net_fops_create(const char *name,
 	mode_t mode, struct file_operations *fops)
 {
@@ -220,6 +234,11 @@ static inline struct proc_dir_entry *pro
 	return res;
 }
 
+static inline void __proc_net_remove(const char *name)
+{
+	remove_proc_entry(name, NULL);
+}
+
 static inline void proc_net_remove(const char *name)
 {
 	remove_proc_entry(name,proc_net);
@@ -230,15 +249,20 @@ static inline void proc_net_remove(const
 #define proc_root_driver NULL
 #define proc_net NULL
 
+#define __proc_net_fops_create(name, mode, fops, p) ({ (void)(mode), NULL; })
 #define proc_net_fops_create(name, mode, fops)  ({ (void)(mode), NULL; })
 #define proc_net_create(name, mode, info)	({ (void)(mode), NULL; })
+static inline void __proc_net_remove(const char *name) {}
 static inline void proc_net_remove(const char *name) {}
 
-static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; }
-static inline void proc_pid_flush(struct dentry *proc_dentry) { }
+static inline void proc_pid_unhash(struct task_struct *p, struct dentry * [2])
+	{ return NULL; }
+static inline void proc_pid_flush(struct dentry *proc_dentry[2]) { }
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+static inline struct proc_dir_entry *create_proc_glob_entry(const char *name,
+	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
 static inline struct proc_dir_entry *proc_create(const char *name,
 	mode_t mode, struct proc_dir_entry *parent,
 	const struct file_operations *proc_fops)
@@ -267,6 +291,48 @@ extern struct proc_dir_entry proc_root;
 
 #endif /* CONFIG_PROC_FS */
 
+static inline struct proc_dir_entry *create_proc_entry_mod(const char *name,
+					mode_t mode,
+					struct proc_dir_entry *parent,
+					struct module *owner)
+{
+	struct proc_dir_entry *ent;
+
+	/*
+	 * lock_kernel() here protects against proc_lookup()
+	 * which can find this freshly created entry w/o owner being set.
+	 * this can lead to module being put more times then getted.
+	 */
+	lock_kernel();
+	ent = create_proc_entry(name, mode, parent);
+	if (ent)
+		ent->owner = owner;
+	unlock_kernel();
+
+	return ent;
+}
+
+static inline struct proc_dir_entry *create_proc_glob_entry_mod(const char *name, 
+					mode_t mode,
+					struct proc_dir_entry *parent,
+					struct module *owner)
+{
+	struct proc_dir_entry *ent;
+
+	/*
+	 * lock_kernel() here protects against proc_lookup()
+	 * which can find this freshly created entry w/o owner being set.
+	 * this can lead to module being put more times then getted.
+	 */
+	lock_kernel();
+	ent = create_proc_glob_entry(name, mode, parent);
+	if (ent)
+		ent->owner = owner;
+	unlock_kernel();
+
+	return ent;
+}
+
 #if !defined(CONFIG_PROC_FS)
 static inline void kclist_add(struct kcore_list *new, void *addr, size_t size)
 {
@@ -318,4 +384,18 @@ struct proc_maps_private {
 	struct vm_area_struct *tail_vma;
 };
 
+static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
+{
+	if (de)
+		atomic_inc(&de->count);
+	return de;
+}
+
+extern void de_put(struct proc_dir_entry *de);
+
+#define LPDE(inode)	(PROC_I((inode))->pde)
+#ifdef CONFIG_VE
+#define GPDE(inode)	(*(struct proc_dir_entry **)(&(inode)->i_pipe))
+#endif
+
 #endif /* _LINUX_PROC_FS_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/quota.h linux-2.6.9-ve023stab054/include/linux/quota.h
--- linux-2.6.9-100.orig/include/linux/quota.h	2004-10-19 01:54:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/quota.h	2011-06-15 19:26:20.000000000 +0400
@@ -37,7 +37,6 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/spinlock.h>
 
 #define __DQUOT_VERSION__	"dquot_6.5.1"
 #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
@@ -45,8 +44,6 @@
 typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
 typedef __u64 qsize_t;          /* Type in which we store sizes */
 
-extern spinlock_t dq_data_lock;
-
 /* Size of blocks in which are counted size limits */
 #define QUOTABLOCK_BITS 10
 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -133,10 +130,14 @@ struct if_dqinfo {
 
 #ifdef __KERNEL__
 
+#include <linux/spinlock.h>
+
 #include <linux/dqblk_xfs.h>
 #include <linux/dqblk_v1.h>
 #include <linux/dqblk_v2.h>
 
+extern spinlock_t dq_data_lock;
+
 /* Maximal numbers of writes for quota operation (insert/delete/update)
  * (over all formats) - info block, 4 pointer blocks, data block */
 #define DQUOT_MAX_WRITES	6
@@ -239,6 +240,8 @@ struct quota_format_ops {
 	int (*release_dqblk)(struct dquot *dquot);	/* Called when last reference to dquot is being dropped */
 };
 
+struct inode;
+struct iattr;
 /* Operations working with dquots */
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
@@ -253,9 +256,11 @@ struct dquot_operations {
 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
 	int (*write_info) (struct super_block *, int);	/* Write of quota "superblock" */
+	int (*rename) (struct inode *, struct inode *, struct inode *);
 };
 
 /* Operations handling requests from userspace */
+struct v2_disk_dqblk;
 struct quotactl_ops {
 	int (*quota_on)(struct super_block *, int, int, char *);
 	int (*quota_off)(struct super_block *, int);
@@ -268,6 +273,9 @@ struct quotactl_ops {
 	int (*set_xstate)(struct super_block *, unsigned int, int);
 	int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 	int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
+#ifdef CONFIG_QUOTA_COMPAT
+	int (*get_quoti)(struct super_block *, int, unsigned int, struct v2_disk_dqblk *);
+#endif
 };
 
 struct quota_format_type {
@@ -303,6 +311,8 @@ int mark_dquot_dirty(struct dquot *dquot
 
 int register_quota_format(struct quota_format_type *fmt);
 void unregister_quota_format(struct quota_format_type *fmt);
+long sys32compat_quotactl(unsigned int cmd, const char __user *special,
+					qid_t id, void __user *addr);
 
 struct quota_module_name {
 	int qm_fmt_id;
diff -Nurap linux-2.6.9-100.orig/include/linux/quotaops.h linux-2.6.9-ve023stab054/include/linux/quotaops.h
--- linux-2.6.9-100.orig/include/linux/quotaops.h	2004-10-19 01:53:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/quotaops.h	2011-06-15 19:26:20.000000000 +0400
@@ -170,6 +170,19 @@ static __inline__ int DQUOT_TRANSFER(str
 	return 0;
 }
 
+static __inline__ int DQUOT_RENAME(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	struct dquot_operations *q_op;
+
+	q_op = inode->i_sb->dq_op;
+	if (q_op && q_op->rename) {
+		if (q_op->rename(inode, old_dir, new_dir) == NO_QUOTA)
+			return 1;
+	}
+	return 0;
+}
+
 /* The following two functions cannot be called inside a transaction */
 #define DQUOT_SYNC(sb)	sync_dquots(sb, -1)
 
@@ -197,6 +210,7 @@ static __inline__ int DQUOT_OFF(struct s
 #define DQUOT_SYNC(sb)				do { } while(0)
 #define DQUOT_OFF(sb)				do { } while(0)
 #define DQUOT_TRANSFER(inode, iattr)		(0)
+#define DQUOT_RENAME(inode, old_dir, new_dir)	(0)
 extern __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 {
 	inode_add_bytes(inode, nr);
diff -Nurap linux-2.6.9-100.orig/include/linux/reiserfs_xattr.h linux-2.6.9-ve023stab054/include/linux/reiserfs_xattr.h
--- linux-2.6.9-100.orig/include/linux/reiserfs_xattr.h	2004-10-19 01:55:36.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/reiserfs_xattr.h	2011-06-15 19:26:19.000000000 +0400
@@ -42,7 +42,8 @@ int reiserfs_removexattr (struct dentry 
 int reiserfs_delete_xattrs (struct inode *inode);
 int reiserfs_chown_xattrs (struct inode *inode, struct iattr *attrs);
 int reiserfs_xattr_init (struct super_block *sb, int mount_flags);
-int reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd);
+int reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd,
+		struct exec_perm *exec_perm);
 int reiserfs_permission_locked (struct inode *inode, int mask, struct nameidata *nd);
 
 int reiserfs_xattr_del (struct inode *, const char *);
diff -Nurap linux-2.6.9-100.orig/include/linux/rmap.h linux-2.6.9-ve023stab054/include/linux/rmap.h
--- linux-2.6.9-100.orig/include/linux/rmap.h	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/rmap.h	2011-06-15 19:26:22.000000000 +0400
@@ -71,7 +71,7 @@ void __anon_vma_link(struct vm_area_stru
  */
 void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
-void page_remove_rmap(struct page *);
+void page_remove_rmap(struct page *, struct vm_area_struct *vma);
 
 /**
  * page_dup_rmap - duplicate pte mapping to a page
diff -Nurap linux-2.6.9-100.orig/include/linux/rtnetlink.h linux-2.6.9-ve023stab054/include/linux/rtnetlink.h
--- linux-2.6.9-100.orig/include/linux/rtnetlink.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/rtnetlink.h	2011-06-15 19:26:18.000000000 +0400
@@ -807,7 +807,8 @@ extern struct semaphore rtnl_sem;
 #define rtnl_shlock()		down(&rtnl_sem)
 #define rtnl_shlock_nowait()	down_trylock(&rtnl_sem)
 
-#define rtnl_shunlock()	do { up(&rtnl_sem); \
+#define __rtnl_shunlock()	up(&rtnl_sem)
+#define rtnl_shunlock()	do { __rtnl_shunlock(); \
 		             if (rtnl && rtnl->sk_receive_queue.qlen) \
 				     rtnl->sk_data_ready(rtnl, 0); \
 		        } while(0)
diff -Nurap linux-2.6.9-100.orig/include/linux/sched.h linux-2.6.9-ve023stab054/include/linux/sched.h
--- linux-2.6.9-100.orig/include/linux/sched.h	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/sched.h	2011-06-15 19:26:22.000000000 +0400
@@ -30,7 +30,12 @@
 #include <linux/pid.h>
 #include <linux/percpu.h>
 
+#include <ub/ub_task.h>
+
 struct exec_domain;
+struct task_beancounter;
+struct user_beancounter;
+struct ve_struct;
 extern int exec_shield;
 extern int exec_shield_randomize;
 extern int print_fatal_signals;
@@ -87,6 +92,9 @@ extern unsigned long avenrun[];		/* Load
 	load += n*(FIXED_1-exp); \
 	load >>= FSHIFT;
 
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
 #define CT_TO_SECS(x)	((x) / HZ)
 #define CT_TO_USECS(x)	(((x) % HZ) * 1000000/HZ)
 
@@ -94,10 +102,22 @@ extern int nr_threads;
 extern int last_pid;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
+
+extern unsigned long nr_sleeping(void);
+extern unsigned long nr_stopped(void);
+extern unsigned long nr_zombie;
+extern atomic_t nr_dead;
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_iowait(void);
 
+#ifdef CONFIG_VE
+struct ve_struct;
+extern unsigned long nr_running_ve(struct ve_struct *);
+extern unsigned long nr_iowait_ve(void);
+extern unsigned long nr_uninterruptible_ve(struct ve_struct *);
+#endif
+
 #include <linux/time.h>
 #include <linux/param.h>
 #include <linux/resource.h>
@@ -153,11 +173,16 @@ typedef struct task_struct task_t;
 extern void sched_init(void);
 extern void sched_init_smp(void);
 extern void init_idle(task_t *idle, int cpu);
+extern void fini_idle(int cpu);
 
 extern cpumask_t nohz_cpu_mask;
 
 extern void show_state(void);
 extern void show_regs(struct pt_regs *);
+extern void smp_show_regs(struct pt_regs *, void *);
+extern void show_vsched(void);
+extern int vsched_init_default(int cpu);
+extern void vsched_fini_default(int cpu);
 
 /*
  * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
@@ -175,6 +200,8 @@ extern void update_process_times(int use
 extern void scheduler_tick(int user_tick, int system);
 extern unsigned long cache_decay_ticks;
 
+int setscheduler(pid_t pid, int policy, struct sched_param __user *param);
+
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched		__attribute__((__section__(".sched.text")))
 /* Is this address in the __sched functions? */
@@ -259,6 +286,8 @@ struct mm_struct {
 	unsigned long saved_auxv[42]; /* for /proc/PID/auxv */
 
 	unsigned dumpable:2;
+	unsigned vps_dumpable:2;
+	unsigned oom_killed:1;
 	cpumask_t cpu_vm_mask;
 
 	/* Architecture-specific MM context */
@@ -277,8 +306,12 @@ struct mm_struct {
 	struct kioctx		*ioctx_list;
 
 	struct kioctx		default_kioctx;
+
+	struct user_beancounter	*mm_ub;
 };
 
+#define mm_ub(__mm)	((__mm)->mm_ub)
+
 extern int mmlist_nr;
 
 struct sighand_struct {
@@ -287,6 +320,9 @@ struct sighand_struct {
 	spinlock_t		siglock;
 };
 
+#include <linux/ve.h>
+#include <linux/ve_task.h>
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -480,6 +516,8 @@ extern int groups_search(struct group_in
 
 struct audit_context;		/* See audit.c */
 struct mempolicy;
+struct vcpu_scheduler;
+struct vcpu_info;
 
 /* auxilliary task structure to avoid KABI breakage */
 struct task_struct_aux {
@@ -504,6 +542,14 @@ struct task_struct {
 
 	int lock_depth;		/* Lock depth */
 
+#ifdef CONFIG_SCHED_VCPU
+	struct vcpu_scheduler *vsched;
+	struct vcpu_info *vcpu;
+
+	/* id's are saved to avoid locking (e.g. on vsched->id access) */
+	int vsched_id;
+	int vcpu_id;
+#endif
 	int prio, static_prio;
 	struct list_head run_list;
 	prio_array_t *array;
@@ -522,6 +568,7 @@ struct task_struct {
 #endif
 
 	struct list_head tasks;
+
 	/*
 	 * ptrace_list/ptrace_children forms the list of my children
 	 * that were stolen by a ptracer.
@@ -644,6 +691,11 @@ struct task_struct {
 
 	unsigned long ptrace_message;
 	siginfo_t *last_siginfo; /* For ptrace use.  */
+/* state tracking for suspend */
+	sigset_t saved_sigset;
+	__u8	 pn_state;
+	__u8	 stopped_state:1, sigsuspend_state:1;
+
 /*
  * current io wait handle: wait queue entry to use for io waits
  * If this thread is processing aio, this points at the waitqueue
@@ -655,6 +707,16 @@ struct task_struct {
   	struct mempolicy *mempolicy;
   	short il_next;		/* could be shared with used_math */
 #endif
+#ifdef CONFIG_USER_RESOURCE
+	struct task_beancounter	task_bc;
+#endif
+#ifdef CONFIG_VE
+	struct ve_task_info ve_task_info;
+#endif
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+	unsigned long	magic;
+	struct inode	*ino;
+#endif
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
@@ -681,10 +743,8 @@ do { if (atomic_dec_and_test(&(tsk)->usa
 #define PF_DUMPCORE	0x00000200	/* dumped core */
 #define PF_SIGNALED	0x00000400	/* killed by a signal */
 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
-#define PF_MEMDIE	0x00001000	/* Killed for out-of-memory */
 #define PF_FLUSHER	0x00002000	/* responsible for disk writeback */
 
-#define PF_FREEZE	0x00004000	/* this task should be frozen for suspend */
 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
@@ -695,6 +755,71 @@ do { if (atomic_dec_and_test(&(tsk)->usa
 #define PF_RELOCEXEC	0x00400000	/* relocate shared libraries */
 
 
+static inline unsigned long cycles_to_clocks(cycles_t cycles)
+{
+	extern unsigned long cycles_per_clock;
+	do_div(cycles, cycles_per_clock);
+	return cycles;
+}
+
+static inline u64 cycles_to_jiffies(cycles_t cycles)
+{
+	extern unsigned long cycles_per_jiffy;
+	do_div(cycles, cycles_per_jiffy);
+	return cycles;
+}
+
+#ifndef CONFIG_VE
+#define set_pn_state(tsk, state)	do { } while(0)
+#define clear_pn_state(tsk)		do { } while(0)
+#define set_sigsuspend_state(tsk, sig)	do { } while(0)
+#define clear_sigsuspend_state(tsk)	do { } while(0)
+#define set_stop_state(tsk)		do { } while(0)
+#define clear_stop_state(tsk)		do { } while(0)
+#else
+#define PN_STOP_TF	1	/* was not in 2.6.8 */
+#define PN_STOP_TF_RT	2	/* was not in 2.6.8 */ 
+#define PN_STOP_ENTRY	3
+#define PN_STOP_FORK	4
+#define PN_STOP_VFORK	5
+#define PN_STOP_SIGNAL	6
+#define PN_STOP_EXIT	7
+#define PN_STOP_EXEC	8
+#define PN_STOP_LEAVE	9
+
+static inline void set_pn_state(struct task_struct *tsk, int state)
+{
+	tsk->pn_state = state;
+}
+
+static inline void clear_pn_state(struct task_struct *tsk)
+{
+	tsk->pn_state = 0;
+}
+
+static inline void set_sigsuspend_state(struct task_struct *tsk, sigset_t saveset)
+{
+	tsk->sigsuspend_state = 1;
+	tsk->saved_sigset = saveset;
+}
+
+static inline void clear_sigsuspend_state(struct task_struct *tsk)
+{
+	tsk->sigsuspend_state = 0;
+	siginitset(&tsk->saved_sigset, 0);
+}
+
+static inline void set_stop_state(struct task_struct *tsk)
+{
+	tsk->stopped_state = 1;
+}
+
+static inline void clear_stop_state(struct task_struct *tsk)
+{
+	tsk->stopped_state = 0;
+}
+#endif
+
 #ifdef CONFIG_SMP
 extern int set_cpus_allowed(task_t *p, cpumask_t new_mask);
 #else
@@ -748,12 +873,243 @@ extern struct task_struct init_task;
 
 extern struct   mm_struct init_mm;
 
-#define find_task_by_pid(nr)	find_task_by_pid_type(PIDTYPE_PID, nr)
-extern struct task_struct *find_task_by_pid_type(int type, int pid);
+#define find_task_by_pid_all(nr)	\
+		find_task_by_pid_type_all(PIDTYPE_PID, nr)
+extern struct task_struct *find_task_by_pid_type_all(int type, int pid);
 extern void set_special_pids(pid_t session, pid_t pgrp);
 extern void __set_special_pids(pid_t session, pid_t pgrp);
 
+#ifndef CONFIG_VE
+#define find_task_by_pid_ve find_task_by_pid_all
+
+#define get_exec_env()	NULL
+static inline struct ve_struct * set_exec_env(struct ve_struct *new_env)
+{
+	return NULL;
+}
+#define ve_is_super(env)			1
+#define ve_accessible(target, owner)		1
+#define ve_accessible_strict(target, owner)	1
+#define ve_accessible_veid(target, owner)		1
+#define ve_accessible_strict_veid(target, owner)	1
+
+#define VEID(envid)				0
+#define get_ve0() NULL
+
+static inline pid_t virt_pid(struct task_struct *tsk)
+{
+	return tsk->pid;
+}
+
+static inline pid_t virt_tgid(struct task_struct *tsk)
+{
+	return tsk->tgid;
+}
+
+static inline pid_t virt_pgid(struct task_struct *tsk)
+{
+	return tsk->signal->pgrp;
+}
+
+static inline pid_t virt_sid(struct task_struct *tsk)
+{
+	return tsk->signal->session;
+}
+
+static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *ve)
+{
+	return tsk->pid;
+}
+
+static inline pid_t get_task_pid(struct task_struct *tsk)
+{
+	return tsk->pid;
+}
+
+static inline pid_t get_task_tgid(struct task_struct *tsk)
+{
+	return tsk->tgid;
+}
+
+static inline pid_t get_task_pgid(struct task_struct *tsk)
+{
+	return tsk->signal->pgrp;
+}
+
+static inline pid_t get_task_sid(struct task_struct *tsk)
+{
+	return tsk->signal->session;
+}
+
+static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline pid_t get_task_ppid(struct task_struct *p)
+{
+	if (!pid_alive(p))
+		return 0;
+	return (p->pid > 1 ? p->group_leader->real_parent->pid : 0);
+}
+
+#else	/* CONFIG_VE */
+
+#include <asm/current.h>
+#include <linux/ve.h>
+
+extern struct ve_struct ve0;
+
+#define find_task_by_pid_ve(nr)	\
+		find_task_by_pid_type_ve(PIDTYPE_PID, nr)
+
+extern struct task_struct *find_task_by_pid_type_ve(int type, int pid);
+
+#define get_ve0()	(&ve0)
+#define VEID(envid)	((envid)->veid)
+
+#define get_exec_env()	(VE_TASK_INFO(current)->exec_env)
+static inline struct ve_struct *set_exec_env(struct ve_struct *new_env)
+{
+	struct ve_struct *old_env;
+
+	old_env = VE_TASK_INFO(current)->exec_env;
+	VE_TASK_INFO(current)->exec_env = new_env;
+
+	return old_env;
+}
+
+#define ve_is_super(env) ((env) == get_ve0())
+#define ve_accessible_strict(target, owner)	((target) == (owner))
+static inline int ve_accessible(struct ve_struct *target,
+				struct ve_struct *owner) {
+	return ve_is_super(owner) || ve_accessible_strict(target, owner);
+}
+
+#define ve_accessible_strict_veid(target, owner) ((target) == (owner))
+static inline int ve_accessible_veid(envid_t target, envid_t owner)
+{
+	return get_ve0()->veid == owner ||
+	       ve_accessible_strict_veid(target, owner);
+}
+
+static inline pid_t virt_pid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_PID].vnr;
+}
+
+static inline pid_t virt_tgid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_TGID].vnr;
+}
+
+static inline pid_t virt_pgid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_PGID].vnr;
+}
+
+static inline pid_t virt_sid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_SID].vnr;
+}
+
+static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *env)
+{
+	return ve_is_super(env) ? tsk->pid : virt_pid(tsk);
+}
+
+static inline pid_t get_task_pid(struct task_struct *tsk)
+{
+	return get_task_pid_ve(tsk, get_exec_env());
+}
+
+static inline pid_t get_task_tgid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->tgid : virt_tgid(tsk);
+}
+
+static inline pid_t get_task_pgid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->signal->pgrp : virt_pgid(tsk);
+}
+
+static inline pid_t get_task_sid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->signal->session : virt_sid(tsk);
+}
+
+static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_PID].vnr = pid;
+}
+
+static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_TGID].vnr = pid;
+}
+
+static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_PGID].vnr = pid;
+}
+
+static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_SID].vnr = pid;
+}
+
+static inline pid_t get_task_ppid(struct task_struct *p)
+{
+	struct task_struct *parent;
+	struct ve_struct *env;
+
+	if (!pid_alive(p))
+		return 0;
+	env = get_exec_env();
+	if (get_task_pid_ve(p, env) == 1)
+		return 0;
+	parent = p->group_leader->real_parent;
+	return ve_accessible(VE_TASK_INFO(parent)->owner_env, env) ?
+		get_task_pid_ve(parent, env) : 1;
+}
+
+void ve_sched_get_cpu_stat(struct ve_struct *envid, cycles_t *idle,
+				cycles_t *strv, unsigned int cpu);
+void ve_sched_attach(struct ve_struct *envid);
+
+#endif	/* CONFIG_VE */
+
+#if defined(CONFIG_VE)
+extern cycles_t ve_sched_get_idle_time(struct ve_struct *, int);
+extern cycles_t ve_sched_get_iowait_time(struct ve_struct *, int);
+#else
+#define ve_sched_get_idle_time(ve, cpu)	0
+#define ve_sched_get_iowait_time(ve, cpu)	0
+#endif
+
+#ifdef CONFIG_SCHED_VCPU
+struct vcpu_scheduler;
+extern void fastcall vsched_cpu_online_map(struct vcpu_scheduler *sched,
+		cpumask_t *mask);
+#else
+#define vsched_cpu_online_map(vsched, mask)	do {	\
+			*mask = cpu_online_map;		\
+	} while (0)
+#endif
+
 /* per-UID process charging. */
+extern int set_user(uid_t new_ruid, int dumpclear);
 extern struct user_struct * alloc_uid(uid_t);
 static inline struct user_struct *get_uid(struct user_struct *u)
 {
@@ -769,6 +1125,7 @@ extern unsigned long itimer_ticks;
 extern unsigned long itimer_next;
 extern void do_timer(struct pt_regs *);
 
+extern void wake_up_init(task_t *p);
 extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
 extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
@@ -833,6 +1190,11 @@ extern int do_sigaltstack(const stack_t 
 #define SEND_SIG_PRIV	((struct siginfo *) 1)
 #define SEND_SIG_FORCED	((struct siginfo *) 2)
 
+static inline int is_si_special(const struct siginfo *info)
+{
+	return info <= SEND_SIG_FORCED;
+}
+
 /* True if we are on the alternate signal stack.  */
 
 static inline int on_sig_stack(unsigned long sp)
@@ -907,6 +1269,9 @@ extern task_t *child_reaper;
 extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
 task_t *fork_idle(int);
+extern struct task_struct * copy_process(unsigned long, unsigned long,
+		struct pt_regs *, unsigned long, int __user *, int __user *,
+		long pid, long vpid);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
 extern void get_task_comm(char *to, struct task_struct *tsk);
@@ -932,22 +1297,98 @@ extern void wait_task_inactive(task_t * 
 	add_parent(p, (p)->parent);				\
 	} while (0)
 
-#define next_task(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
-#define prev_task(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
+#define next_task_all(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
+#define prev_task_all(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
 
-#define for_each_process(p) \
-	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+#define for_each_process_all(p) \
+	for (p = &init_task ; (p = next_task_all(p)) != &init_task ; )
 
 /*
  * Careful: do_each_thread/while_each_thread is a double loop so
  *          'break' will not work as expected - use goto instead.
  */
-#define do_each_thread(g, t) \
-	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
+#define do_each_thread_all(g, t) \
+	for (g = t = &init_task ; (g = t = next_task_all(g)) != &init_task ; ) do
+
+#define while_each_thread_all(g, t) \
+	while ((t = next_thread(t)) != g)
+
+#ifndef CONFIG_VE
+
+#define SET_VE_LINKS(p)
+#define REMOVE_VE_LINKS(p)
+#define for_each_process_ve(p)		for_each_process_all(p)
+#define do_each_thread_ve(g, t)		do_each_thread_all(g, t)
+#define while_each_thread_ve(g, t)	while_each_thread_all(g, t)
+#define first_task_ve()			next_task_ve(&init_task)
+#define next_task_ve(p)			\
+		(next_task_all(p) != &init_task ? next_task_all(p) : NULL)
+
+#else	/* CONFIG_VE */
+
+#define SET_VE_LINKS(p)							\
+	do {								\
+		if (thread_group_leader(p))				\
+			list_add_tail(&VE_TASK_INFO(p)->vetask_list,	\
+					&VE_TASK_INFO(p)->owner_env->vetask_lh); \
+	} while (0)
+
+#define REMOVE_VE_LINKS(p)						\
+	do {								\
+		if (thread_group_leader(p))				\
+			list_del(&VE_TASK_INFO(p)->vetask_list);	\
+	} while(0)
+
+static inline task_t* __first_task_ve(struct ve_struct *ve)
+{
+	task_t *tsk;
+
+	if (unlikely(ve_is_super(ve))) {
+		tsk = next_task_all(&init_task);
+		if (tsk == &init_task)
+			tsk = NULL;
+	} else {
+		/* probably can return ve->init_entry, but it's more clear */
+		BUG_ON(list_empty(&ve->vetask_lh));
+		tsk = VE_TASK_LIST_2_TASK(ve->vetask_lh.next);
+	}
+	return tsk;
+}
 
-#define while_each_thread(g, t) \
+static inline task_t* __next_task_ve(struct ve_struct *ve, task_t *tsk)
+{
+	if (unlikely(ve_is_super(ve))) {
+		tsk = next_task_all(tsk);
+		if (tsk == &init_task)
+			tsk = NULL;
+	} else {
+		struct list_head *tmp;
+
+		BUG_ON(VE_TASK_INFO(tsk)->owner_env != ve);
+		tmp = VE_TASK_INFO(tsk)->vetask_list.next;
+		if (tmp == &ve->vetask_lh)
+			tsk = NULL;
+		else
+			tsk = VE_TASK_LIST_2_TASK(tmp);
+	}
+	return tsk;
+}
+
+#define first_task_ve()	__first_task_ve(get_exec_env())
+#define next_task_ve(p)	__next_task_ve(get_exec_env(), p)
+/* no one uses prev_task_ve(), copy next_task_ve() if needed */
+
+#define for_each_process_ve(p) \
+	for (p = first_task_ve(); p != NULL ; p = next_task_ve(p))
+
+#define do_each_thread_ve(g, t) \
+	for (g = t = first_task_ve() ; g != NULL; g = t = next_task_ve(g)) do
+
+#define while_each_thread_ve(g, t) \
 	while ((t = next_thread(t)) != g)
 
+#endif	/* CONFIG_VE */
+
 extern task_t * FASTCALL(next_thread(const task_t *p));
 
 #define thread_group_leader(p)	(p->pid == p->tgid)
@@ -1072,28 +1513,61 @@ extern void signal_wake_up(struct task_s
  */
 #ifdef CONFIG_SMP
 
-static inline unsigned int task_cpu(const struct task_struct *p)
+static inline unsigned int task_pcpu(const struct task_struct *p)
 {
 	return p->thread_info->cpu;
 }
 
-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
 {
 	p->thread_info->cpu = cpu;
 }
 
 #else
 
+static inline unsigned int task_pcpu(const struct task_struct *p)
+{
+	return 0;
+}
+
+static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
+{
+}
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_SCHED_VCPU
+
+static inline unsigned int task_vsched_id(const struct task_struct *p)
+{
+	return p->vsched_id;
+}
+
 static inline unsigned int task_cpu(const struct task_struct *p)
 {
+	return p->vcpu_id;
+}
+
+extern void set_task_cpu(struct task_struct *p, unsigned int vcpu);
+
+#else
+
+static inline unsigned int task_vsched_id(const struct task_struct *p)
+{
 	return 0;
 }
 
+static inline unsigned int task_cpu(const struct task_struct *p)
+{
+	return task_pcpu(p);
+}
+
 static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 {
+	set_task_pcpu(p, cpu);
 }
 
-#endif /* CONFIG_SMP */
+#endif /* CONFIG_SCHED_VCPU */
 
 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 extern void arch_pick_mmap_layout(struct mm_struct *mm);
diff -Nurap linux-2.6.9-100.orig/include/linux/security.h linux-2.6.9-ve023stab054/include/linux/security.h
--- linux-2.6.9-100.orig/include/linux/security.h	2011-06-09 19:22:48.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/security.h	2011-06-15 19:26:20.000000000 +0400
@@ -62,7 +62,8 @@ static inline int cap_netlink_send (stru
 
 static inline int cap_netlink_recv (struct sk_buff *skb)
 {
-	if (!cap_raised (NETLINK_CB (skb).eff_cap, CAP_NET_ADMIN))
+	if (!cap_raised (NETLINK_CB (skb).eff_cap, CAP_VE_NET_ADMIN) &&
+		!cap_raised (NETLINK_CB (skb).eff_cap, CAP_NET_ADMIN))
 		return -EPERM;
 	return 0;
 }
diff -Nurap linux-2.6.9-100.orig/include/linux/shm.h linux-2.6.9-ve023stab054/include/linux/shm.h
--- linux-2.6.9-100.orig/include/linux/shm.h	2004-10-19 01:54:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/shm.h	2011-06-15 19:26:19.000000000 +0400
@@ -73,6 +73,8 @@ struct shm_info {
 };
 
 #ifdef __KERNEL__
+struct user_beancounter;
+
 struct shmid_kernel /* private to the kernel */
 {	
 	struct kern_ipc_perm	shm_perm;
@@ -86,8 +88,12 @@ struct shmid_kernel /* private to the ke
 	pid_t			shm_cprid;
 	pid_t			shm_lprid;
 	struct user_struct	*mlock_user;
+	struct user_beancounter *shmidk_ub;
+	struct ipc_ids		*_shm_ids;
 };
 
+#define shmid_ub(__shmid) (__shmid)->shmidk_ub
+
 /* shm_mode upper byte flags */
 #define	SHM_DEST	01000	/* segment will be destroyed on last detach */
 #define SHM_LOCKED      02000   /* segment will not be swapped */
diff -Nurap linux-2.6.9-100.orig/include/linux/shmem_fs.h linux-2.6.9-ve023stab054/include/linux/shmem_fs.h
--- linux-2.6.9-100.orig/include/linux/shmem_fs.h	2004-10-19 01:54:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/shmem_fs.h	2011-06-15 19:26:19.000000000 +0400
@@ -8,6 +8,8 @@
 
 #define SHMEM_NR_DIRECT 16
 
+struct user_beancounter;
+
 struct shmem_inode_info {
 	spinlock_t		lock;
 	unsigned long		flags;
@@ -19,8 +21,11 @@ struct shmem_inode_info {
 	swp_entry_t		i_direct[SHMEM_NR_DIRECT]; /* first blocks */
 	struct list_head	swaplist;	/* chain of maybes on swap */
 	struct inode		vfs_inode;
+	struct user_beancounter *info_ub;
 };
 
+#define shm_info_ub(__shmi) (__shmi)->info_ub
+
 struct shmem_sb_info {
 	unsigned long max_blocks;   /* How many blocks are allowed */
 	unsigned long free_blocks;  /* How many are left for allocation */
diff -Nurap linux-2.6.9-100.orig/include/linux/signal.h linux-2.6.9-ve023stab054/include/linux/signal.h
--- linux-2.6.9-100.orig/include/linux/signal.h	2004-10-19 01:53:37.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/signal.h	2011-06-15 19:26:19.000000000 +0400
@@ -14,14 +14,19 @@
  * Real Time signals may be queued.
  */
 
+struct user_beancounter;
+
 struct sigqueue {
 	struct list_head list;
 	spinlock_t *lock;
 	int flags;
 	siginfo_t info;
 	struct user_struct *user;
+	struct user_beancounter *sig_ub;
 };
 
+#define sig_ub(__q) ((__q)->sig_ub)
+
 /* flags values. */
 #define SIGQUEUE_PREALLOC	1
 
diff -Nurap linux-2.6.9-100.orig/include/linux/skbuff.h linux-2.6.9-ve023stab054/include/linux/skbuff.h
--- linux-2.6.9-100.orig/include/linux/skbuff.h	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/skbuff.h	2011-06-15 19:26:19.000000000 +0400
@@ -19,6 +19,7 @@
 #include <linux/compiler.h>
 #include <linux/time.h>
 #include <linux/cache.h>
+#include <linux/ve_owner.h>
 
 #include <asm/atomic.h>
 #include <asm/types.h>
@@ -191,6 +192,8 @@ struct skb_shared_info {
  *	@tc_index: Traffic control index
  */
 
+#include <ub/ub_sk.h>
+
 struct sk_buff {
 	/* These two members must be first. */
 	struct sk_buff		*next;
@@ -291,13 +294,18 @@ struct sk_buff {
 				*data,
 				*tail,
 				*end;
+	struct skb_beancounter	skb_bc;
+	struct ve_struct	*owner_env;
 };
 
+DCL_VE_OWNER_PROTO(SKB, SLAB, struct sk_buff, owner_env, , (noinline, regparm(1)))
+
 #ifdef __KERNEL__
 /*
  *	Handling routines are only of interest to the kernel
  */
 #include <linux/slab.h>
+#include <ub/ub_net.h>
 
 #include <asm/system.h>
 
@@ -921,6 +929,8 @@ static inline int pskb_trim(struct sk_bu
  */
 static inline void skb_orphan(struct sk_buff *skb)
 {
+	ub_skb_uncharge(skb);
+
 	if (skb->destructor)
 		skb->destructor(skb);
 	skb->destructor = NULL;
diff -Nurap linux-2.6.9-100.orig/include/linux/slab.h linux-2.6.9-ve023stab054/include/linux/slab.h
--- linux-2.6.9-100.orig/include/linux/slab.h	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/slab.h	2011-06-15 19:26:22.000000000 +0400
@@ -48,6 +48,27 @@ typedef struct kmem_cache_s kmem_cache_t
 #define SLAB_PANIC		0x00040000UL	/* panic if kmem_cache_create() fails */
 #define SLAB_DESTROY_BY_RCU	0x00080000UL	/* defer freeing pages to RCU */
 
+/*
+ * allocation rules:                            __GFP_UBC       0
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *  cache (SLAB_UBC)				charge		charge
+ *				      (usual caches: mm, vma, task_struct, ...)
+ *
+ *  cache (SLAB_UBC | SLAB_NO_CHARGE)		charge		---
+ *					     (ub_kmalloc)    (kmalloc)
+ *
+ *  cache (no UB flags)				BUG()		---
+ *							(nonub caches, mempools)
+ *
+ *  pages					charge		---
+ *					   (ub_vmalloc,	      (vmalloc,
+ *				        poll, fdsets, ...)  non-ub allocs)
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#define SLAB_UBC		0x20000000UL	/* alloc space for ubs ... */
+#define SLAB_NO_CHARGE		0x40000000UL	/* ... but don't charge */
+
+
 /* flags passed to a constructor func */
 #define	SLAB_CTOR_CONSTRUCTOR	0x001UL		/* if not set, then deconstructor */
 #define SLAB_CTOR_ATOMIC	0x002UL		/* tell constructor it can't sleep */
@@ -73,11 +94,12 @@ struct cache_sizes {
 	kmem_cache_t	*cs_dmacachep;
 };
 extern struct cache_sizes malloc_sizes[];
+extern int malloc_cache_num;
 extern void *__kmalloc(size_t, int);
 
 static inline void *kmalloc(size_t size, int flags)
 {
-	if (__builtin_constant_p(size)) {
+	if (__builtin_constant_p(size) && __builtin_constant_p(flags)) {
 		int i = 0;
 #define CACHE(x) \
 		if (size <= x) \
@@ -91,6 +113,8 @@ static inline void *kmalloc(size_t size,
 			__you_cannot_kmalloc_that_much();
 		}
 found:
+		if (flags & __GFP_UBC)
+			i += malloc_cache_num;
 		return kmem_cache_alloc((flags & GFP_DMA) ?
 			malloc_sizes[i].cs_dmacachep :
 			malloc_sizes[i].cs_cachep, flags);
@@ -129,6 +153,8 @@ void pgd_ctor(void *, kmem_cache_t *, un
 void pgd_dtor(void *, kmem_cache_t *, unsigned long);
 
 
+void show_slab_info(void);
+
 #endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_SLAB_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/smp.h linux-2.6.9-ve023stab054/include/linux/smp.h
--- linux-2.6.9-100.orig/include/linux/smp.h	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/smp.h	2011-06-15 19:26:22.000000000 +0400
@@ -8,6 +8,9 @@
 
 #include <linux/config.h>
 
+struct pt_regs;
+typedef void (*smp_nmi_function)(struct pt_regs *regs, void *info);
+
 #ifdef CONFIG_SMP
 
 #include <linux/preempt.h>
@@ -49,6 +52,8 @@ extern int __cpu_up(unsigned int cpunum)
  */
 extern void smp_cpus_done(unsigned int max_cpus);
 
+extern int smp_nmi_call_function(smp_nmi_function func, void *info, int wait);
+
 /*
  * Call a function on all other processors
  */
@@ -116,6 +121,12 @@ static inline void smp_send_reschedule(i
 #define num_booting_cpus()			1
 #define smp_prepare_boot_cpu()			do {} while (0)
 
+static inline int smp_nmi_call_function(smp_nmi_function func,
+					 void *info, int wait)
+{
+	return 0;
+}
+
 #endif /* !SMP */
 
 #define get_cpu()		({ preempt_disable(); smp_processor_id(); })
diff -Nurap linux-2.6.9-100.orig/include/linux/socket.h linux-2.6.9-ve023stab054/include/linux/socket.h
--- linux-2.6.9-100.orig/include/linux/socket.h	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/socket.h	2011-06-15 19:26:19.000000000 +0400
@@ -290,6 +290,7 @@ extern void memcpy_tokerneliovec(struct 
 extern int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen);
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+extern int vz_security_proto_check(int family, int type, int protocol);
 
 struct socket;
 struct file * sock_map_file(struct socket *sock);
diff -Nurap linux-2.6.9-100.orig/include/linux/stop_machine.h linux-2.6.9-ve023stab054/include/linux/stop_machine.h
--- linux-2.6.9-100.orig/include/linux/stop_machine.h	2004-10-19 01:53:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/stop_machine.h	2011-06-15 19:26:22.000000000 +0400
@@ -8,6 +8,8 @@
 #include <linux/cpu.h>
 #include <asm/system.h>
 
+void stop_machine_show_state(void);
+
 #ifdef CONFIG_SMP
 /**
  * stop_machine_run: freeze the machine on all CPUs and run this function
diff -Nurap linux-2.6.9-100.orig/include/linux/sunrpc/debug.h linux-2.6.9-ve023stab054/include/linux/sunrpc/debug.h
--- linux-2.6.9-100.orig/include/linux/sunrpc/debug.h	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/sunrpc/debug.h	2011-06-15 19:26:22.000000000 +0400
@@ -96,6 +96,7 @@ enum {
 	CTL_SLOTTABLE_TCP,
 	CTL_MIN_RESVPORT,
 	CTL_MAX_RESVPORT,
+	CTL_VE_ALLOW_RPC,
 };
 
 #endif /* _LINUX_SUNRPC_DEBUG_H_ */
diff -Nurap linux-2.6.9-100.orig/include/linux/sunrpc/xprt.h linux-2.6.9-ve023stab054/include/linux/sunrpc/xprt.h
--- linux-2.6.9-100.orig/include/linux/sunrpc/xprt.h	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/sunrpc/xprt.h	2011-06-15 19:26:18.000000000 +0400
@@ -57,7 +57,7 @@ extern unsigned int xprt_tcp_slot_table_
  * Delay an arbitrary number of seconds before attempting to reconnect
  * after an error.
  */
-#define RPC_REESTABLISH_TIMEOUT	(15*HZ)
+#define RPC_REESTABLISH_TIMEOUT	(HZ/10)
 
 /* RPC call and reply header size as number of 32bit words (verifier
  * size computed separately)
diff -Nurap linux-2.6.9-100.orig/include/linux/suspend.h linux-2.6.9-ve023stab054/include/linux/suspend.h
--- linux-2.6.9-100.orig/include/linux/suspend.h	2004-10-19 01:53:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/suspend.h	2011-06-15 19:26:18.000000000 +0400
@@ -45,7 +45,7 @@ static inline int software_suspend(void)
 
 
 #ifdef CONFIG_PM
-extern void refrigerator(unsigned long);
+extern void refrigerator(void);
 extern int freeze_processes(void);
 extern void thaw_processes(void);
 
@@ -53,7 +53,7 @@ extern int pm_prepare_console(void);
 extern void pm_restore_console(void);
 
 #else
-static inline void refrigerator(unsigned long flag) {}
+static inline void refrigerator(void) {}
 #endif	/* CONFIG_PM */
 
 #ifdef CONFIG_SMP
diff -Nurap linux-2.6.9-100.orig/include/linux/swap.h linux-2.6.9-ve023stab054/include/linux/swap.h
--- linux-2.6.9-100.orig/include/linux/swap.h	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/swap.h	2011-06-15 19:26:21.000000000 +0400
@@ -15,6 +15,7 @@ struct notifier_block;
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
 #define SWAP_FLAG_PRIO_SHIFT	0
+#define SWAP_FLAG_READONLY	0x40000000	/* set if swap is read-only */
 
 static inline int current_is_kswapd(void)
 {
@@ -81,6 +82,7 @@ struct address_space;
 struct sysinfo;
 struct writeback_control;
 struct zone;
+struct user_beancounter;
 
 /*
  * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
@@ -108,6 +110,7 @@ enum {
 	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
 	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
 	SWP_ACTIVE	= (SWP_USED | SWP_WRITEOK),
+	SWP_READONLY	= (1 << 2)
 };
 
 #define SWAP_CLUSTER_MAX 32
@@ -120,6 +123,8 @@ enum {
  * extent_list.prev points at the lowest-index extent.  That list is
  * sorted.
  */
+struct user_beancounter;
+
 struct swap_info_struct {
 	unsigned int flags;
 	spinlock_t sdev_lock;
@@ -134,6 +139,7 @@ struct swap_info_struct {
 	unsigned int highest_bit;
 	unsigned int cluster_next;
 	unsigned int cluster_nr;
+	struct user_beancounter **owner_map;	
 	int prio;			/* swap priority */
 	int pages;
 	unsigned long max;
@@ -150,7 +156,8 @@ struct swap_list_t {
 #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
 
 /* linux/mm/oom_kill.c */
-extern void out_of_memory(int gfp_mask);
+struct oom_freeing_stat;
+extern void out_of_memory(struct oom_freeing_stat *, int gfp_mask);
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);
 
@@ -165,6 +172,7 @@ extern unsigned int nr_free_pages(void);
 extern unsigned int nr_free_pages_pgdat(pg_data_t *pgdat);
 extern unsigned int nr_free_buffer_pages(void);
 extern unsigned int nr_free_pagecache_pages(void);
+extern unsigned int nr_lowmem_pages(void);
 
 /* linux/mm/swap.c */
 extern int pagecache_maxpercent;
@@ -204,6 +212,7 @@ extern struct address_space swapper_spac
 #define total_swapcache_pages  swapper_space.nrpages
 extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *);
+extern int add_to_swap_cache(struct page *page, swp_entry_t entry);
 extern void __delete_from_swap_cache(struct page *);
 extern void delete_from_swap_cache(struct page *);
 extern int move_to_swap_cache(struct page *, swp_entry_t);
@@ -219,7 +228,7 @@ extern long total_swap_pages;
 extern unsigned int nr_swapfiles;
 extern struct swap_info_struct swap_info[];
 extern void si_swapinfo(struct sysinfo *);
-extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page(struct user_beancounter *);
 extern int swap_duplicate(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
 extern void swap_free(swp_entry_t);
@@ -228,6 +237,7 @@ extern sector_t map_swap_page(struct swa
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
 extern int can_share_swap_page(struct page *);
 extern int remove_exclusive_swap_page(struct page *);
+extern int try_to_remove_exclusive_swap_page(struct page *);
 struct backing_dev_info;
 
 extern struct swap_list_t swap_list;
@@ -285,7 +295,7 @@ static inline int remove_exclusive_swap_
 	return 0;
 }
 
-static inline swp_entry_t get_swap_page(void)
+static inline swp_entry_t get_swap_page(struct user_beancounter *ub)
 {
 	swp_entry_t entry;
 	entry.val = 0;
diff -Nurap linux-2.6.9-100.orig/include/linux/sysctl.h linux-2.6.9-ve023stab054/include/linux/sysctl.h
--- linux-2.6.9-100.orig/include/linux/sysctl.h	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/sysctl.h	2011-06-15 19:26:22.000000000 +0400
@@ -141,6 +141,19 @@ enum
 	KERN_PANIC_ON_NMI=69,	/* int: whether we will panic on an unrecovered */
 	KERN_WAKE_BALANCE=70,   /* int: behavior of load balancing on wakeup */
 	KERN_IA64_UNALIGNED=71, /* int: ia64 unaligned userland trap enable */
+	KERN_SILENCE_LEVEL=100, /* int: Console silence loglevel */
+	KERN_ALLOC_FAIL_WARN=101, /* int: whether we'll print "alloc failure" */
+	KERN_SYSRQ_KEY_SCANCODE=102, /* int: sysrq key */
+	KERN_VCPU_HOT_TIMESLICE=200,
+	KERN_FAIRSCHED_MAX_LATENCY=201, /* int: Max start_tag delta */
+	KERN_VCPU_SCHED_TIMESLICE=202,
+	KERN_VCPU_TIMESLICE=203,
+	KERN_VIRT_PIDS=204,	/* int: VE pids virtualization */
+	KERN_VIRT_OSRELEASE=205,/* virtualization of utsname.release */
+	KERN_SCALE_VCPU_FREQUENCY=206,	/* Scale cpu frequency inside VE */
+	KERN_VE_ALLOW_KTHREADS=207,
+	KERN_VE_MEMINFO=208,	/* int: use privvmpages(0) or oomguarpages(1) */
+	KERN_VE_ALLOW_INIT_SIGNALS=209,	/* int: CT's init rcvs signals or not */
 };
 
 
@@ -342,6 +355,7 @@ enum
 	NET_TCP_RMEM=85,
 	NET_TCP_APP_WIN=86,
 	NET_TCP_ADV_WIN_SCALE=87,
+	NET_TCP_USE_SG=245,
 	NET_IPV4_NONLOCAL_BIND=88,
 	NET_IPV4_ICMP_RATELIMIT=89,
 	NET_IPV4_ICMP_RATEMASK=90,
@@ -366,10 +380,13 @@ enum
 	NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
 	NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=110,
 	NET_TCP_SLOW_START_AFTER_IDLE=111,
+	NET_TCP_MAX_TW_BUCKETS_VE=151,
+	NET_TCP_MAX_TW_KMEM_FRACTION=152,
 };
 
 enum {
 	NET_IPV4_ROUTE_FLUSH=1,
+	NET_IPV4_ROUTE_SRC_CHECK=188,
 	NET_IPV4_ROUTE_MIN_DELAY=2,
 	NET_IPV4_ROUTE_MAX_DELAY=3,
 	NET_IPV4_ROUTE_GC_THRESH=4,
@@ -745,6 +762,12 @@ enum
 	FS_XFS=17,	/* struct: control xfs parameters */
 	FS_AIO_NR=18,	/* current system-wide number of aio requests */
 	FS_AIO_MAX_NR=19,	/* system-wide maximum number of aio requests */
+	FS_AT_VSYSCALL=20,	/* int: to announce vsyscall data */
+};
+
+/* /proc/sys/debug */
+enum {
+	DBG_DECODE_CALLTRACES = 1,	/* int: decode call traces on oops */
 };
 
 /* /proc/sys/fs/quota/ */
@@ -847,6 +870,9 @@ enum
 
 #ifdef __KERNEL__
 
+extern int ve_allow_kthreads;
+extern int ve_allow_init_signals;
+
 extern void sysctl_init(void);
 
 typedef struct ctl_table ctl_table;
@@ -875,6 +901,8 @@ extern int proc_doulongvec_minmax(ctl_ta
 				  void __user *, size_t *, loff_t *);
 extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int,
 				      struct file *, void __user *, size_t *, loff_t *);
+extern int proc_doutsstring(ctl_table *table, int write, struct file *,
+			    void __user *, size_t *, loff_t *);
 
 extern int do_sysctl (int __user *name, int nlen,
 		      void __user *oldval, size_t __user *oldlenp,
@@ -928,6 +956,8 @@ extern ctl_handler sysctl_jiffies;
  */
 
 /* A sysctl table is an array of struct ctl_table: */
+struct ve_struct;
+
 struct ctl_table 
 {
 	int ctl_name;			/* Binary ID */
@@ -941,6 +971,7 @@ struct ctl_table 
 	struct proc_dir_entry *de;	/* /proc control block */
 	void *extra1;
 	void *extra2;
+	struct ve_struct *owner_env;
 };
 
 /* struct ctl_table_header is used to maintain dynamic lists of
@@ -959,6 +990,9 @@ struct ctl_table_header * register_sysct
 						int insert_at_head);
 void unregister_sysctl_table(struct ctl_table_header * table);
 
+ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr);
+void free_sysctl_clone(ctl_table *clone);
+
 #else /* __KERNEL__ */
 
 #endif /* __KERNEL__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/sysrq.h linux-2.6.9-ve023stab054/include/linux/sysrq.h
--- linux-2.6.9-100.orig/include/linux/sysrq.h	2004-10-19 01:53:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/sysrq.h	2011-06-15 19:26:18.000000000 +0400
@@ -29,6 +29,12 @@ struct sysrq_key_op {
  * are available -- else NULL's).
  */
 
+#ifdef CONFIG_SYSRQ_DEBUG
+int sysrq_eat_all(void);
+#else
+#define sysrq_eat_all()	(0)
+#endif
+
 void handle_sysrq(int, struct pt_regs *, struct tty_struct *);
 void __handle_sysrq(int, struct pt_regs *, struct tty_struct *);
 
diff -Nurap linux-2.6.9-100.orig/include/linux/tcp.h linux-2.6.9-ve023stab054/include/linux/tcp.h
--- linux-2.6.9-100.orig/include/linux/tcp.h	2004-10-19 01:54:08.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/tcp.h	2011-06-15 19:26:19.000000000 +0400
@@ -212,6 +212,27 @@ enum tcp_congestion_algo {
 	TCP_BIC,
 };
 
+struct tcp_options_received {
+	/*	PAWS/RTTM data	*/
+	long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
+	__u32	ts_recent;	/* Time stamp to echo next		*/
+	__u32	rcv_tsval;	/* Time stamp value             	*/
+	__u32	rcv_tsecr;	/* Time stamp echo reply        	*/
+	char	saw_tstamp;	/* Saw TIMESTAMP on last packet		*/
+	char	tstamp_ok;	/* TIMESTAMP seen on SYN packet		*/
+	char	sack_ok;	/* SACK seen on SYN packet		*/
+	char	wscale_ok;	/* Wscale seen on SYN packet		*/
+	__u8	snd_wscale;	/* Window scaling received from sender	*/
+	__u8	rcv_wscale;	/* Window scaling to send to receiver	*/
+	/*	SACKs data	*/
+	__u8	dsack;		/* D-SACK is scheduled			*/
+	__u8	eff_sacks;	/* Size of SACK array to send with next packet */
+	__u8	num_sacks;	/* Number of SACK blocks		*/
+	__u8	__pad;
+	__u16	user_mss;  	/* mss requested by user in ioctl */
+	__u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
+};
+
 struct tcp_opt {
 	int	tcp_header_len;	/* Bytes of tcp header to send		*/
 
@@ -262,22 +283,19 @@ struct tcp_opt {
 	__u32	pmtu_cookie;	/* Last pmtu seen by socket		*/
 	__u32	mss_cache;	/* Cached effective mss, not including SACKS */
 	__u16	mss_cache_std;	/* Like mss_cache, but without TSO */
-	__u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 	__u16	ext_header_len;	/* Network protocol overhead (IP/IPv6 options) */
 	__u16	ext2_header_len;/* Options depending on route */
 	__u8	ca_state;	/* State of fast-retransmit machine 	*/
 	__u8	retransmits;	/* Number of unrecovered RTO timeouts.	*/
+	__u32	frto_highmark;	/* snd_nxt when RTO occurred */
 
 	__u8	reordering;	/* Packet reordering metric.		*/
 	__u8	frto_counter;	/* Number of new acks after RTO */
-	__u32	frto_highmark;	/* snd_nxt when RTO occurred */
 
 	__u8	adv_cong;	/* Using Vegas, Westwood, or BIC */
 	__u8	defer_accept;	/* User waits for some data after accept() */
-	/* one byte hole, try to pack */
 
 /* RTT measurement */
-	__u8	backoff;	/* backoff				*/
 	__u32	srtt;		/* smoothed round trip time << 3	*/
 	__u32	mdev;		/* medium deviation			*/
 	__u32	mdev_max;	/* maximal mdev for the last rtt period	*/
@@ -288,6 +306,15 @@ struct tcp_opt {
 	tcp_pcount_t packets_out; /* Packets which are "in flight"	*/
 	tcp_pcount_t left_out;	  /* Packets which leaved network	*/
 	tcp_pcount_t retrans_out; /* Retransmitted packets out		*/
+	__u8	backoff;	/* backoff				*/
+/*
+ *      Options received (usually on last packet, some only on SYN packets).
+ */
+	__u8	nonagle;	/* Disable Nagle algorithm?             */
+	__u8	keepalive_probes; /* num of allowed keep alive probes	*/
+
+	__u8	probes_out;	/* unanswered 0 window probes		*/
+	struct tcp_options_received rx_opt;
 
 
 /*
@@ -314,40 +341,19 @@ struct tcp_opt {
 	__u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
 	__u32	pushed_seq;	/* Last pushed seq, required to talk to windows */
 	__u32	copied_seq;	/* Head of yet unread data		*/
-/*
- *      Options received (usually on last packet, some only on SYN packets).
- */
-	char	tstamp_ok,	/* TIMESTAMP seen on SYN packet		*/
-		wscale_ok,	/* Wscale seen on SYN packet		*/
-		sack_ok;	/* SACK seen on SYN packet		*/
-	char	saw_tstamp;	/* Saw TIMESTAMP on last packet		*/
-        __u8	snd_wscale;	/* Window scaling received from sender	*/
-        __u8	rcv_wscale;	/* Window scaling to send to receiver	*/
-	__u8	nonagle;	/* Disable Nagle algorithm?             */
-	__u8	keepalive_probes; /* num of allowed keep alive probes	*/
-
-/*	PAWS/RTTM data	*/
-        __u32	rcv_tsval;	/* Time stamp value             	*/
-        __u32	rcv_tsecr;	/* Time stamp echo reply        	*/
-        __u32	ts_recent;	/* Time stamp to echo next		*/
-        long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
 
 /*	SACKs data	*/
-	__u16	user_mss;  	/* mss requested by user in ioctl */
-	__u8	dsack;		/* D-SACK is scheduled			*/
-	__u8	eff_sacks;	/* Size of SACK array to send with next packet */
 	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
 	struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
 
 	__u32	window_clamp;	/* Maximal window to advertise		*/
 	__u32	rcv_ssthresh;	/* Current window clamp			*/
-	__u8	probes_out;	/* unanswered 0 window probes		*/
-	__u8	num_sacks;	/* Number of SACK blocks		*/
 	__u16	advmss;		/* Advertised MSS			*/
 
 	__u8	syn_retries;	/* num of allowed syn retries */
 	__u8	ecn_flags;	/* ECN status bits.			*/
 	__u16	prior_ssthresh; /* ssthresh saved at recovery start	*/
+ 	__u16	__pad1;
 	tcp_pcount_t lost_out;	/* Lost packets			*/
 	tcp_pcount_t sacked_out;/* SACK'd packets			*/
 	tcp_pcount_t fackets_out;/* FACK'd packets			*/
diff -Nurap linux-2.6.9-100.orig/include/linux/tty.h linux-2.6.9-ve023stab054/include/linux/tty.h
--- linux-2.6.9-100.orig/include/linux/tty.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/tty.h	2011-06-15 19:26:19.000000000 +0400
@@ -239,6 +239,8 @@ struct device;
  * size each time the window is created or resized anyway.
  * 						- TYT, 9/14/92
  */
+struct user_beancounter;
+
 struct tty_struct {
 	int	magic;
 	struct tty_driver *driver;
@@ -294,8 +296,12 @@ struct tty_struct {
 	spinlock_t read_lock;
 	/* If the tty has a pending do_SAK, queue it here - akpm */
 	struct work_struct SAK_work;
+	struct ve_struct *owner_env;
 };
 
+DCL_VE_OWNER_PROTO(TTY, TAIL_SOFT, struct tty_struct, owner_env, , ())
+#define tty_ub(__tty) (slab_ub(__tty))
+
 /* tty magic number */
 #define TTY_MAGIC		0x5401
 
@@ -322,6 +328,7 @@ struct tty_struct {
 #define TTY_PTY_LOCK 		16	/* pty private */
 #define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
 #define TTY_HUPPED 		18	/* Post driver->hangup() */
+#define TTY_CHARGED		20	/* Charged as ub resource */
 
 #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
 
diff -Nurap linux-2.6.9-100.orig/include/linux/tty_driver.h linux-2.6.9-ve023stab054/include/linux/tty_driver.h
--- linux-2.6.9-100.orig/include/linux/tty_driver.h	2004-10-19 01:54:37.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/tty_driver.h	2011-06-15 19:26:20.000000000 +0400
@@ -115,6 +115,7 @@
  * 	character to the device.
  */
 
+#include <linux/ve_owner.h>
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/cdev.h>
@@ -214,14 +215,21 @@ struct tty_driver {
 			unsigned int set, unsigned int clear);
 
 	struct list_head tty_drivers;
+	struct ve_struct *owner_env;
 };
 
+DCL_VE_OWNER_PROTO(TTYDRV, TAIL_SOFT, struct tty_driver, owner_env, , ())
+
 extern struct list_head tty_drivers;
+extern rwlock_t tty_driver_guard;
 
 struct tty_driver *alloc_tty_driver(int lines);
 void put_tty_driver(struct tty_driver *driver);
 void tty_set_operations(struct tty_driver *driver, struct tty_operations *op);
 
+struct class_simple *init_ve_tty_class(void);
+void fini_ve_tty_class(struct class_simple *ve_tty_class);
+
 /* tty driver magic number */
 #define TTY_DRIVER_MAGIC		0x5402
 
diff -Nurap linux-2.6.9-100.orig/include/linux/ve.h linux-2.6.9-ve023stab054/include/linux/ve.h
--- linux-2.6.9-100.orig/include/linux/ve.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/ve.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,343 @@
+/*
+ *  include/linux/ve.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VE_H
+#define _LINUX_VE_H
+
+#include <linux/config.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/utsname.h>
+#include <linux/sysctl.h>
+#include <linux/vzstat.h>
+#include <linux/kobject.h>
+
+#ifdef VZMON_DEBUG
+#  define VZTRACE(fmt,args...) \
+	printk(KERN_DEBUG fmt, ##args)
+#else
+#  define VZTRACE(fmt,args...)
+#endif /* VZMON_DEBUG */
+
+struct tty_driver;
+struct devpts_config;
+struct task_struct;
+struct new_utsname;
+struct file_system_type;
+struct icmp_mib;
+struct ip_mib;
+struct tcp_mib;
+struct udp_mib;
+struct linux_mib;
+struct fib_info;
+struct fib_rule;
+struct veip_struct;
+struct ve_monitor;
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+struct fib_table;
+struct devcnfv4_struct;
+#ifdef CONFIG_VE_IPTABLES
+struct ipt_filter_initial_table;
+struct ipt_nat_initial_table;
+struct ipt_table;
+struct ip_conntrack;
+struct nf_hook_ops;
+struct ve_ip_conntrack {
+	struct list_head 	*_ip_conntrack_hash;
+	struct list_head	_ip_conntrack_expect_list;
+	struct list_head	_ip_conntrack_helpers;
+	struct ip_conntrack_protocol	**_ip_ct_protos;
+	int 			_ip_conntrack_max;
+	unsigned long		_ip_ct_tcp_timeouts[10];
+	unsigned long		_ip_ct_udp_timeout;
+	unsigned long		_ip_ct_udp_timeout_stream;
+	unsigned long		_ip_ct_icmp_timeout;
+	unsigned long		_ip_ct_generic_timeout;
+	atomic_t		_ip_conntrack_count;
+	void (*_ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
+#ifdef CONFIG_SYSCTL
+	unsigned int		_ip_ct_log_invalid;
+	unsigned long		_ip_ct_tcp_timeout_max_retrans;
+	int			_ip_ct_tcp_loose;
+	int			_ip_ct_tcp_be_liberal;
+	int			_ip_ct_tcp_max_retrans;
+	struct ctl_table_header *_ip_ct_sysctl_header;
+	ctl_table		*_ip_ct_net_table;
+	ctl_table		*_ip_ct_ipv4_table;
+	ctl_table		*_ip_ct_netfilter_table;
+	ctl_table		*_ip_ct_sysctl_table;
+#endif /*CONFIG_SYSCTL*/
+
+	int			_ip_conntrack_ftp_ports_c;
+	int			_ip_conntrack_irc_ports_c;
+
+	struct list_head	_ip_nat_helpers;
+	struct list_head	*_ip_nat_bysource;
+	struct ipt_nat_initial_table *_ip_nat_initial_table;
+	struct ipt_table	*_ip_nat_table;
+	struct ip_nat_protocol	**_ip_nat_protos;
+
+	int			_ip_nat_ftp_ports_c;
+	int			_ip_nat_irc_ports_c;
+
+	/* resource accounting */
+	struct user_beancounter *ub;
+};
+#endif
+#endif
+
+#define UIDHASH_BITS_VE		6
+#define UIDHASH_SZ_VE		(1 << UIDHASH_BITS_VE)
+
+struct ve_cpu_stats {
+	cycles_t	idle_time;
+	cycles_t	iowait_time;
+	cycles_t	strt_idle_time;
+	cycles_t	used_time;
+	seqcount_t	stat_lock;
+	int		nr_running;
+	int		nr_unint;
+	u64		user;
+	u64		nice;
+	u64		system;
+} ____cacheline_aligned;
+
+struct ve_struct {
+	struct ve_struct	*prev;
+	struct ve_struct	*next;
+
+	envid_t			veid;
+	struct task_struct	*init_entry;
+	struct list_head	vetask_lh;
+	kernel_cap_t		cap_default;
+	atomic_t		pcounter;
+	/* ref counter to ve from ipc */
+	atomic_t		counter;	
+	unsigned int		class_id;
+	struct veip_struct	*veip;
+	struct rw_semaphore	op_sem;
+	int			is_running;
+	int			is_locked;
+	atomic_t		suspend;
+	int			virt_pids;
+	unsigned int		flags;
+	/* see vzcalluser.h for VE_FEATURE_XXX definitions */
+	__u64			features;
+
+/* VE's root */
+	struct vfsmount 	*fs_rootmnt;
+	struct dentry 		*fs_root;
+
+/* sysctl */
+	struct new_utsname	*utsname;
+	struct list_head	sysctl_lh;
+	struct ctl_table_header	*kern_header;
+	struct ctl_table	*kern_table;
+	struct ctl_table_header	*quota_header;
+	struct ctl_table	*quota_table;
+	struct file_system_type *proc_fstype;
+	struct vfsmount		*proc_mnt;
+	struct proc_dir_entry	*proc_root;
+	struct proc_dir_entry	*proc_sys_root;
+
+/* SYSV IPC */
+	struct ipc_ids		*_shm_ids;
+	struct ipc_ids		*_msg_ids;
+	struct ipc_ids		*_sem_ids;
+	int			_used_sems;
+	int			_shm_tot;
+	size_t			_shm_ctlmax;
+	size_t			_shm_ctlall;
+	int			_shm_ctlmni;
+	int			_msg_ctlmax;
+	int			_msg_ctlmni;
+	int			_msg_ctlmnb;
+	atomic_t		_msg_bytes;
+	atomic_t		_msg_hdrs;
+	int			_sem_ctls[4];
+
+/* BSD pty's */
+	struct tty_driver       *pty_driver;
+	struct tty_driver       *pty_slave_driver;
+
+#ifdef CONFIG_UNIX98_PTYS
+	struct tty_driver	*ptm_driver;
+	struct tty_driver	*pts_driver;
+	struct idr		*allocated_ptys;
+#endif
+	struct file_system_type *devpts_fstype;
+	struct vfsmount		*devpts_mnt;
+	struct dentry		*devpts_root;
+	struct devpts_config	*devpts_config;
+
+	struct file_system_type *shmem_fstype;
+	struct vfsmount		*shmem_mnt;
+#ifdef CONFIG_SYSFS
+	struct file_system_type *sysfs_fstype;
+	struct vfsmount		*sysfs_mnt;
+	struct super_block	*sysfs_sb;
+	struct sysfs_dirent	*sysfs_root;
+#endif
+	struct subsystem	*class_subsys;
+	struct subsystem	*class_obj_subsys;
+	struct list_head	_simple_dev_list;
+	struct class		*net_class;
+	struct class_simple	*tty_class;
+
+/* User uids hash */
+	struct list_head	uidhash_table[UIDHASH_SZ_VE];
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	struct hlist_head	_net_dev_head;
+	struct hlist_head	_net_dev_index_head;
+	struct net_device	*_net_dev_base, **_net_dev_tail;
+	int			ifindex;
+	struct net_device	*_loopback_dev;
+	struct net_device_stats	*_loopback_stats;
+	struct net_device	*_venet_dev;
+	struct ipv4_devconf	*_ipv4_devconf;
+	struct ipv4_devconf	*_ipv4_devconf_dflt;
+	struct ctl_table_header	*forward_header;
+	struct ctl_table	*forward_table;
+#endif
+ 	unsigned long		rt_flush_required;
+
+	struct neigh_table	*ve_nd_tbl;
+	struct neigh_table	*ve_arp_tbl;
+
+/* per VE CPU stats*/
+	struct timespec		start_timespec;
+	u64			start_jiffies;	/* Deprecated */
+	cycles_t 		start_cycles;
+	unsigned long		avenrun[3];	/* loadavg data */
+
+	cycles_t 		cpu_used_ve;
+	struct kstat_lat_pcpu_struct	sched_lat_ve;
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	struct hlist_head	*_fib_info_hash;
+	struct hlist_head	*_fib_info_laddrhash;
+	int			_fib_hash_size;
+	int			_fib_info_cnt;
+
+	struct fib_rule		*_local_rule;
+	struct fib_rule		*_fib_rules;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	/* XXX: why a magic constant? */
+	struct fib_table 	*_fib_tables[256]; /* RT_TABLE_MAX - for now */
+#else
+	struct fib_table	*_main_table;
+	struct fib_table	*_local_table;
+#endif
+	struct icmp_mib		*_icmp_statistics[2];
+	struct ipstats_mib	*_ip_statistics[2];
+	struct tcp_mib		*_tcp_statistics[2];
+	struct udp_mib		*_udp_statistics[2];
+	struct linux_mib	*_net_statistics[2];
+	struct venet_stat       *stat;
+#ifdef CONFIG_VE_IPTABLES
+/* core/netfilter.c virtualization */
+	void			*_nf_hooks;
+	struct ipt_filter_initial_table	*_ipt_filter_initial_table; /* initial_table struct */
+	struct ipt_table	*_ve_ipt_filter_pf; /* packet_filter struct */
+	struct nf_hook_ops	*_ve_ipt_filter_io; /* ipt_ops struct */
+	struct ipt_table	*_ipt_mangle_table;
+	struct nf_hook_ops	*_ipt_mangle_hooks;
+	struct list_head	*_ipt_target;
+	struct list_head	*_ipt_match;
+	struct list_head	*_ipt_tables;
+
+	struct ipt_target 	*_ipt_standard_target;
+	struct ipt_target 	*_ipt_error_target;
+	struct ipt_match 	*_tcp_matchstruct;
+	struct ipt_match 	*_udp_matchstruct;
+	struct ipt_match 	*_icmp_matchstruct;
+
+	__u64			_iptables_modules;
+	struct ve_ip_conntrack	*_ip_conntrack;
+#endif /* CONFIG_VE_IPTABLES */
+#endif
+	wait_queue_head_t	*_log_wait;
+	unsigned long		*_log_start;
+	unsigned long		*_log_end;
+	unsigned long		*_logged_chars;
+	char			*log_buf;
+#define VE_DEFAULT_LOG_BUF_LEN	4096
+
+	struct ve_cpu_stats 	ve_cpu_stats[NR_CPUS] ____cacheline_aligned;
+	unsigned long		down_at;
+	struct list_head	cleanup_list;
+ 
+ 	unsigned long		jiffies_fixup;
+ 	unsigned char		disable_net;
+ 	unsigned char		sparse_vpid;
+	struct ve_monitor	*monitor;
+	struct proc_dir_entry	*monitor_proc;
+	unsigned long		meminfo_val;
+};
+
+enum {
+	VE_REBOOT,
+};
+
+#define VE_CPU_STATS(ve, cpu) (&((ve)->ve_cpu_stats[(cpu)]))
+
+extern int nr_ve;
+
+#ifdef CONFIG_VE
+
+int get_device_perms_ve(int dev_type, dev_t dev, int access_mode);
+void do_env_cleanup(struct ve_struct *envid);
+void do_update_load_avg_ve(void);
+void do_env_free(struct ve_struct *ptr);
+
+#define ve_utsname (*get_exec_env()->utsname)
+
+static inline struct ve_struct *get_ve(struct ve_struct *ptr)
+{
+	if (ptr != NULL)
+		atomic_inc(&ptr->counter);
+	return ptr;
+}
+
+static inline void put_ve(struct ve_struct *ptr)
+{
+	if (ptr && atomic_dec_and_test(&ptr->counter)) {
+		if (atomic_read(&ptr->pcounter) > 0)
+			BUG();
+		if (ptr->is_running)
+			BUG();
+		do_env_free(ptr);
+	}
+}
+
+extern unsigned long long ve_relative_clock(struct timespec * ts);
+
+extern int sysctl_fsync_enable;
+
+#ifdef CONFIG_FAIRSCHED
+#define ve_cpu_online_map(ve, mask) fairsched_cpu_online_map(ve->veid, mask)
+#else
+#define ve_cpu_online_map(ve, mask)	do { *mask = cpu_online_map; } while (0)
+#endif
+#else	/* CONFIG_VE */
+#define ve_utsname	system_utsname
+#define get_ve(ve)	(NULL)
+#define put_ve(ve)	do { } while (0)
+#endif	/* CONFIG_VE */
+
+#endif /* _LINUX_VE_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/ve_owner.h linux-2.6.9-ve023stab054/include/linux/ve_owner.h
--- linux-2.6.9-100.orig/include/linux/ve_owner.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/ve_owner.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,32 @@
+/*
+ *  include/linux/ve_proto.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_OWNER_H__
+#define __VE_OWNER_H__
+
+#include <linux/config.h>
+#include <linux/vmalloc.h>
+
+
+#define DCL_VE_OWNER(name, kind, type, member, attr1, attr2)
+	/* prototype declares static inline functions */
+
+#define DCL_VE_OWNER_PROTO(name, kind, type, member, attr1, attr2)	\
+type;									\
+static inline struct ve_struct *VE_OWNER_##name(type *obj)		\
+{									\
+	return obj->member;						\
+}									\
+static inline void SET_VE_OWNER_##name(type *obj, struct ve_struct *ve)	\
+{									\
+	obj->member = ve;						\
+}
+
+#endif /* __VE_OWNER_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/ve_proto.h linux-2.6.9-ve023stab054/include/linux/ve_proto.h
--- linux-2.6.9-100.orig/include/linux/ve_proto.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/ve_proto.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,73 @@
+/*
+ *  include/linux/ve_proto.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_H__
+#define __VE_H__
+
+#ifdef CONFIG_VE
+
+extern struct semaphore ve_call_guard;
+extern rwlock_t ve_call_lock;
+
+#ifdef CONFIG_SYSVIPC
+extern void prepare_ipc(void);
+extern int init_ve_ipc(struct ve_struct *);
+extern void fini_ve_ipc(struct ve_struct *);
+extern void ve_ipc_cleanup(void);
+#endif
+
+extern struct tty_driver *get_pty_driver(void);
+extern struct tty_driver *get_pty_slave_driver(void);
+#ifdef CONFIG_UNIX98_PTYS
+extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
+extern struct tty_driver *pts_driver;	/* Unix98 pty slaves;  for /dev/ptmx */
+#endif
+
+extern rwlock_t  tty_driver_guard;
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+void ip_fragment_cleanup(struct ve_struct *envid);
+void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
+struct fib_table * fib_hash_init(int id);
+int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr);
+extern int main_loopback_init(struct net_device*);
+int venet_init(void);
+#endif
+
+extern struct ve_struct *ve_list_head;
+extern rwlock_t ve_list_guard;
+extern struct ve_struct *get_ve_by_id(envid_t);
+extern struct ve_struct *__find_ve_by_id(envid_t);
+
+extern int do_setdevperms(envid_t veid, unsigned type,
+		dev_t dev, unsigned mask);
+
+#define VE_HOOK_INIT	0
+#define VE_HOOK_FINI	1
+#define VE_MAX_HOOKS	2
+
+typedef int ve_hookfn(unsigned int hooknum, void *data);
+
+struct ve_hook
+{
+	struct list_head list;
+	ve_hookfn *hook;
+	ve_hookfn *undo;
+	struct module *owner;
+	int hooknum;
+	/* Functions are called in ascending priority. */
+	int priority;
+};
+
+extern int ve_hook_register(struct ve_hook *vh);
+extern void ve_hook_unregister(struct ve_hook *vh);
+
+#endif
+#endif
diff -Nurap linux-2.6.9-100.orig/include/linux/ve_task.h linux-2.6.9-ve023stab054/include/linux/ve_task.h
--- linux-2.6.9-100.orig/include/linux/ve_task.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/ve_task.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,34 @@
+/*
+ *  include/linux/ve_task.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_TASK_H__
+#define __VE_TASK_H__
+
+#include <linux/seqlock.h>
+
+struct ve_task_info {
+/* virtualization */
+	struct ve_struct *owner_env;
+	struct ve_struct *exec_env;
+	struct list_head vetask_list;
+	struct dentry *glob_proc_dentry;
+/* statistics: scheduling latency */
+	cycles_t sleep_time;
+	cycles_t sched_time;
+	cycles_t sleep_stamp;
+	cycles_t wakeup_stamp;
+	seqcount_t wakeup_lock;
+};
+
+#define VE_TASK_INFO(task)	(&(task)->ve_task_info)
+#define VE_TASK_LIST_2_TASK(lh)	\
+	list_entry(lh, struct task_struct, ve_task_info.vetask_list)
+
+#endif /* __VE_TASK_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/venet.h linux-2.6.9-ve023stab054/include/linux/venet.h
--- linux-2.6.9-100.orig/include/linux/venet.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/venet.h	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,69 @@
+/*
+ *  include/linux/venet.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VENET_H
+#define _VENET_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/vzcalluser.h>
+
+#define VEIP_HASH_SZ 512
+
+struct ve_struct;
+struct venet_stat;
+struct ip_entry_struct
+{
+	__u32			ip;
+	struct ve_struct	*active_env;
+	struct venet_stat	*stat;
+	struct veip_struct	*veip;
+	struct list_head 	ip_hash;
+	struct list_head 	ve_list;
+};
+
+struct veip_struct
+{
+	struct list_head	src_lh;
+	struct list_head	dst_lh;
+	struct list_head	ip_lh;
+	struct list_head	list;
+	envid_t			veid;
+};
+
+/* veip_hash_lock should be taken for write by caller */
+void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip);
+/* veip_hash_lock should be taken for write by caller */
+void ip_entry_unhash(struct ip_entry_struct *entry);
+/* veip_hash_lock should be taken for read by caller */
+struct ip_entry_struct *ip_entry_lookup(u32 addr);
+
+/* veip_hash_lock should be taken for read by caller */
+struct veip_struct *veip_find(envid_t veid);
+/* veip_hash_lock should be taken for write by caller */
+struct veip_struct *veip_findcreate(envid_t veid);
+/* veip_hash_lock should be taken for write by caller */
+void veip_put(struct veip_struct *veip);
+
+int veip_start(struct ve_struct *ve);
+void veip_stop(struct ve_struct *ve);
+void veip_list_cleanup(struct veip_struct *veip);
+int veip_entry_add(struct ve_struct *ve, struct sockaddr_in *addr);
+int veip_entry_del(envid_t veid, struct sockaddr_in *addr);
+int venet_change_skb_owner(struct sk_buff *skb);
+
+extern struct list_head ip_entry_hash_table[];
+extern rwlock_t veip_hash_lock;
+
+#ifdef CONFIG_PROC_FS
+int veip_seq_show(struct seq_file *m, void *v);
+#endif
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/linux/veprintk.h linux-2.6.9-ve023stab054/include/linux/veprintk.h
--- linux-2.6.9-100.orig/include/linux/veprintk.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/veprintk.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,38 @@
+/*
+ *  include/linux/veprintk.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_PRINTK_H__
+#define __VE_PRINTK_H__
+
+#ifdef CONFIG_VE
+
+#define ve_log_wait		(*(get_exec_env()->_log_wait))
+#define ve_log_start		(*(get_exec_env()->_log_start))
+#define ve_log_end		(*(get_exec_env()->_log_end))
+#define ve_logged_chars		(*(get_exec_env()->_logged_chars))
+#define ve_log_buf		(get_exec_env()->log_buf)
+#define ve_log_buf_len		(ve_is_super(get_exec_env()) ? \
+				log_buf_len : VE_DEFAULT_LOG_BUF_LEN)
+#define VE_LOG_BUF_MASK		(ve_log_buf_len - 1)
+#define VE_LOG_BUF(idx)		(ve_log_buf[(idx) & VE_LOG_BUF_MASK])
+
+#else
+
+#define ve_log_wait		log_wait
+#define ve_log_start		log_start
+#define ve_log_end		log_end
+#define ve_logged_chars		logged_chars
+#define ve_log_buf		log_buf
+#define ve_log_buf_len		log_buf_len
+#define VE_LOG_BUF_MASK		LOG_BUF_MASK
+#define VE_LOG_BUF(idx)		LOG_BUF(idx)
+
+#endif /* CONFIG_VE */
+#endif /* __VE_PRINTK_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/virtinfo.h linux-2.6.9-ve023stab054/include/linux/virtinfo.h
--- linux-2.6.9-100.orig/include/linux/virtinfo.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/virtinfo.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,86 @@
+/*
+ *  include/linux/virtinfo.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __LINUX_VIRTINFO_H
+#define __LINUX_VIRTINFO_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/page-flags.h>
+#include <linux/notifier.h>
+
+struct vnotifier_block
+{
+	int (*notifier_call)(struct vnotifier_block *self,
+			unsigned long, void *, int);
+	struct vnotifier_block *next;
+	int priority;
+};
+
+extern struct semaphore virtinfo_sem;
+void __virtinfo_notifier_register(int type, struct vnotifier_block *nb);
+void virtinfo_notifier_register(int type, struct vnotifier_block *nb);
+void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb);
+int virtinfo_notifier_call(int type, unsigned long n, void *data);
+
+struct meminfo {
+	struct sysinfo si;
+	unsigned long active, inactive;
+	unsigned long cache, swapcache;
+	unsigned long committed_space, allowed;
+	struct page_state ps;
+	unsigned long vmalloc_total, vmalloc_used, vmalloc_largest;
+};
+
+#define VIRTINFO_DOFORK		0
+#define VIRTINFO_DOEXIT		1
+#define VIRTINFO_DOEXECVE	2
+#define VIRTINFO_DOFORKRET	3
+#define VIRTINFO_DOFORKPOST	4
+#define VIRTINFO_EXIT		5
+#define VIRTINFO_EXITMMAP	6
+#define VIRTINFO_EXECMMAP	7
+#define VIRTINFO_ENOUGHMEM	8
+#define VIRTINFO_OUTOFMEM	9
+#define VIRTINFO_PAGEIN		10
+#define VIRTINFO_MEMINFO	11
+#define VIRTINFO_SYSINFO	12
+#define VIRTINFO_NEWUBC		13
+
+enum virt_info_types {
+	VITYPE_GENERAL,
+	VITYPE_FAUDIT,
+	VITYPE_QUOTA,
+	VITYPE_SCP,
+
+	VIRT_TYPES
+};
+
+#ifdef CONFIG_VZ_GENCALLS
+
+static inline int virtinfo_gencall(unsigned long n, void *data)
+{
+	int r;
+
+	r = virtinfo_notifier_call(VITYPE_GENERAL, n, data);
+	if (r & NOTIFY_FAIL)
+		return -ENOBUFS;
+	if (r & NOTIFY_OK)
+		return -ERESTARTNOINTR;
+	return 0;
+}
+
+#else
+
+#define virtinfo_gencall(n, data)	0
+
+#endif
+
+#endif /* __LINUX_VIRTINFO_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/virtinfoscp.h linux-2.6.9-ve023stab054/include/linux/virtinfoscp.h
--- linux-2.6.9-100.orig/include/linux/virtinfoscp.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/virtinfoscp.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,23 @@
+#ifndef __VIRTINFO_SCP_H__
+#define __VIRTINFO_SCP_H__
+
+/*
+ * Dump and restore operations are non-symmetric.
+ * With respect to finish/fail hooks, 2 dump hooks are called from
+ * different proc operations, but restore hooks are called from a single one.
+ */
+#define VIRTINFO_SCP_COLLECT	0x10
+#define VIRTINFO_SCP_DUMP	0x11
+#define VIRTINFO_SCP_DMPFIN	0x12
+#define VIRTINFO_SCP_RSTCHECK	0x13
+#define VIRTINFO_SCP_RESTORE	0x14
+#define VIRTINFO_SCP_RSTFAIL	0x15
+
+#define VIRTINFO_SCP_RSTTSK	0x20
+#define VIRTINFO_SCP_RSTMM	0x21
+
+#define VIRTINFO_SCP_TEST	0x30
+
+#define VIRTNOTIFY_CHANGE	0x100
+
+#endif /* __VIRTINFO_SCP_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/vmalloc.h linux-2.6.9-ve023stab054/include/linux/vmalloc.h
--- linux-2.6.9-100.orig/include/linux/vmalloc.h	2011-06-09 19:22:37.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/vmalloc.h	2011-06-15 19:26:19.000000000 +0400
@@ -10,6 +10,10 @@
 #define VM_MAP		0x00000004	/* vmap()ed pages */
 /* bits [20..32] reserved for arch specific ioremap internals */
 
+/* align size to 2^n page boundary */
+#define POWER2_PAGE_ALIGN(size) \
+	((typeof(size))(1UL << (PAGE_SHIFT + get_order(size))))
+
 struct vm_struct {
 	void			*addr;
 	unsigned long		size;
@@ -27,6 +31,8 @@ extern void *vmalloc(unsigned long size)
 extern void *vmalloc_exec(unsigned long size);
 extern void *vmalloc_32(unsigned long size);
 extern void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot);
+extern void *vmalloc_best(unsigned long size);
+extern void *ub_vmalloc_best(unsigned long size);
 extern void vfree(void *addr);
 
 extern void *vmap(struct page **pages, unsigned int count,
@@ -39,6 +45,9 @@ extern void vunmap(void *addr);
 extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
 extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 					unsigned long start, unsigned long end);
+extern struct vm_struct * get_vm_area_best(unsigned long size,
+					   unsigned long flags);
+extern void vprintstat(void);
 extern struct vm_struct *remove_vm_area(void *addr);
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
 			struct page ***pages);
diff -Nurap linux-2.6.9-100.orig/include/linux/vsched.h linux-2.6.9-ve023stab054/include/linux/vsched.h
--- linux-2.6.9-100.orig/include/linux/vsched.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vsched.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,35 @@
+/*
+ *  include/linux/vsched.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VSCHED_H__
+#define __VSCHED_H__
+
+#include <linux/config.h>
+#include <linux/cache.h>
+#include <linux/fairsched.h>
+#include <linux/sched.h>
+
+extern int vsched_create(int id, struct fairsched_node *node);
+extern int vsched_destroy(struct vcpu_scheduler *vsched);
+
+extern int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched);
+extern int vsched_set_vcpus(struct vcpu_scheduler *vsched, unsigned int vcpus);
+
+extern int vcpu_online(int cpu);
+
+#ifdef CONFIG_VE
+#ifdef CONFIG_FAIRSCHED
+extern unsigned long ve_scale_khz(unsigned long khz);
+#else
+#define ve_scale_khz(khz)	(khz)
+#endif
+#endif
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/linux/vzcalluser.h linux-2.6.9-ve023stab054/include/linux/vzcalluser.h
--- linux-2.6.9-100.orig/include/linux/vzcalluser.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vzcalluser.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,230 @@
+/*
+ *  include/linux/vzcalluser.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VZCALLUSER_H
+#define _LINUX_VZCALLUSER_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define KERN_VZ_PRIV_RANGE 51
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+/*
+ * VE management ioctls
+ */
+
+struct vzctl_old_env_create {
+	envid_t veid;
+	unsigned flags;
+#define VE_CREATE 	1	/* Create VE, VE_ENTER added automatically */
+#define VE_EXCLUSIVE	2	/* Fail if exists */
+#define VE_ENTER	4	/* Enter existing VE */
+#define VE_TEST		8	/* Test if VE exists */
+#define VE_LOCK		16	/* Do not allow entering created VE */
+#define VE_SKIPLOCK	32	/* Allow entering embrion VE */
+	__u32 addr;
+};
+
+struct vzctl_mark_env_to_down {
+	envid_t veid;
+};
+
+struct vzctl_setdevperms {
+	envid_t veid;
+	unsigned type;
+#define VE_USE_MAJOR	010	/* Test MAJOR supplied in rule */
+#define VE_USE_MINOR	030	/* Test MINOR supplied in rule */
+#define VE_USE_MASK	030	/* Testing mask, VE_USE_MAJOR|VE_USE_MINOR */
+	unsigned dev;
+	unsigned mask;
+};
+
+struct vzctl_ve_netdev {
+	envid_t veid;
+	int op;
+#define VE_NETDEV_ADD  1
+#define VE_NETDEV_DEL  2
+	char *dev_name;
+};
+
+struct vzctl_ve_meminfo {
+	envid_t veid;
+	unsigned long val;
+};
+
+/* these masks represent modules */
+#define VE_IP_IPTABLES_MOD		(1U<<0)
+#define VE_IP_FILTER_MOD		(1U<<1)
+#define VE_IP_MANGLE_MOD		(1U<<2)
+#define VE_IP_MATCH_LIMIT_MOD		(1U<<3)
+#define VE_IP_MATCH_MULTIPORT_MOD	(1U<<4)
+#define VE_IP_MATCH_TOS_MOD		(1U<<5)
+#define VE_IP_TARGET_TOS_MOD		(1U<<6)
+#define VE_IP_TARGET_REJECT_MOD		(1U<<7)
+#define VE_IP_TARGET_TCPMSS_MOD		(1U<<8)
+#define VE_IP_MATCH_TCPMSS_MOD		(1U<<9)
+#define VE_IP_MATCH_TTL_MOD		(1U<<10)
+#define VE_IP_TARGET_LOG_MOD		(1U<<11)
+#define VE_IP_MATCH_LENGTH_MOD		(1U<<12)
+#define VE_IP_CONNTRACK_MOD		(1U<<14)
+#define VE_IP_CONNTRACK_FTP_MOD		(1U<<15)
+#define VE_IP_CONNTRACK_IRC_MOD		(1U<<16)
+#define VE_IP_MATCH_CONNTRACK_MOD	(1U<<17)
+#define VE_IP_MATCH_STATE_MOD		(1U<<18)
+#define VE_IP_MATCH_HELPER_MOD		(1U<<19)
+#define VE_IP_NAT_MOD			(1U<<20)
+#define VE_IP_NAT_FTP_MOD		(1U<<21)
+#define VE_IP_NAT_IRC_MOD		(1U<<22)
+#define VE_IP_TARGET_REDIRECT_MOD	(1U<<23)
+#define VE_IP_MATCH_OWNER_MOD		(1U<<24)
+
+/* these masks represent modules with their dependences */
+#define VE_IP_IPTABLES		(VE_IP_IPTABLES_MOD)
+#define VE_IP_FILTER		(VE_IP_FILTER_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MANGLE		(VE_IP_MANGLE_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_LIMIT	(VE_IP_MATCH_LIMIT_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_MULTIPORT	(VE_IP_MATCH_MULTIPORT_MOD	\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_TOS		(VE_IP_MATCH_TOS_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_TARGET_TOS	(VE_IP_TARGET_TOS_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_TARGET_REJECT	(VE_IP_TARGET_REJECT_MOD	\
+					| VE_IP_IPTABLES)
+#define VE_IP_TARGET_TCPMSS	(VE_IP_TARGET_TCPMSS_MOD	\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_TCPMSS	(VE_IP_MATCH_TCPMSS_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_TTL		(VE_IP_MATCH_TTL_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_TARGET_LOG	(VE_IP_TARGET_LOG_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MATCH_LENGTH	(VE_IP_MATCH_LENGTH_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_CONNTRACK		(VE_IP_CONNTRACK_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_CONNTRACK_FTP	(VE_IP_CONNTRACK_FTP_MOD	\
+					| VE_IP_CONNTRACK)
+#define VE_IP_CONNTRACK_IRC	(VE_IP_CONNTRACK_IRC_MOD	\
+					| VE_IP_CONNTRACK)
+#define VE_IP_MATCH_CONNTRACK	(VE_IP_MATCH_CONNTRACK_MOD	\
+					| VE_IP_CONNTRACK)
+#define VE_IP_MATCH_STATE	(VE_IP_MATCH_STATE_MOD		\
+					| VE_IP_CONNTRACK)
+#define VE_IP_MATCH_HELPER	(VE_IP_MATCH_HELPER_MOD		\
+					| VE_IP_CONNTRACK)
+#define VE_IP_NAT		(VE_IP_NAT_MOD			\
+					| VE_IP_CONNTRACK)
+#define VE_IP_NAT_FTP		(VE_IP_NAT_FTP_MOD		\
+					| VE_IP_NAT | VE_IP_CONNTRACK_FTP)
+#define VE_IP_NAT_IRC		(VE_IP_NAT_IRC_MOD		\
+					| VE_IP_NAT | VE_IP_CONNTRACK_IRC)
+#define VE_IP_TARGET_REDIRECT	(VE_IP_TARGET_REDIRECT_MOD	\
+					| VE_IP_NAT)
+#define VE_IP_MATCH_OWNER	(VE_IP_MATCH_OWNER_MOD	\
+					| VE_IP_IPTABLES)
+
+/* safe iptables mask to be used by default */
+#define VE_IP_DEFAULT					\
+	(VE_IP_IPTABLES |				\
+	VE_IP_FILTER | VE_IP_MANGLE |			\
+	VE_IP_MATCH_LIMIT | VE_IP_MATCH_MULTIPORT |	\
+	VE_IP_MATCH_TOS | VE_IP_TARGET_REJECT | 	\
+	VE_IP_TARGET_TCPMSS | VE_IP_MATCH_TCPMSS |	\
+	VE_IP_MATCH_TTL | VE_IP_MATCH_LENGTH)
+
+#define VE_IPT_CMP(x,y)		(((x) & (y)) == (y))
+
+struct vzctl_env_create_cid {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+};
+
+struct vzctl_env_create {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+};
+
+struct env_create_param {
+	__u64 iptables_mask;
+};
+#define VZCTL_ENV_CREATE_DATA_MINLEN	sizeof(struct env_create_param)
+
+struct env_create_param2 {
+	__u64 iptables_mask;
+	__u64 feature_mask;
+#define VE_FEATURE_SYSFS	(1ULL << 0)
+	__u32 total_vcpus;	/* 0 - don't care, same as in host */
+};
+#define VZCTL_ENV_CREATE_DATA_MAXLEN	sizeof(struct env_create_param2)
+
+typedef struct env_create_param2 env_create_param_t;
+
+struct vzctl_env_create_data {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+	env_create_param_t *data;
+	int datalen;
+};
+
+struct vz_load_avg {
+	int val_int;
+	int val_frac;
+};
+
+struct vz_cpu_stat {
+	unsigned long user_jif;
+	unsigned long nice_jif;
+	unsigned long system_jif; 
+	unsigned long uptime_jif;
+	__u64 idle_clk;
+	__u64 strv_clk;
+	__u64 uptime_clk;
+	struct vz_load_avg avenrun[3];	/* loadavg data */
+};
+
+struct vzctl_cpustatctl {
+	envid_t veid;
+	struct vz_cpu_stat *cpustat;
+};
+
+#define VZCTLTYPE '.'
+#define VZCTL_OLD_ENV_CREATE	_IOW(VZCTLTYPE, 0,			\
+					struct vzctl_old_env_create)
+#define VZCTL_MARK_ENV_TO_DOWN	_IOW(VZCTLTYPE, 1,			\
+					struct vzctl_mark_env_to_down)
+#define VZCTL_SETDEVPERMS	_IOW(VZCTLTYPE, 2,			\
+					struct vzctl_setdevperms)
+#define VZCTL_ENV_CREATE_CID	_IOW(VZCTLTYPE, 4,			\
+					struct vzctl_env_create_cid)
+#define VZCTL_ENV_CREATE	_IOW(VZCTLTYPE, 5,			\
+					struct vzctl_env_create)
+#define VZCTL_GET_CPU_STAT	_IOW(VZCTLTYPE, 6,			\
+					struct vzctl_cpustatctl)
+#define VZCTL_ENV_CREATE_DATA	_IOW(VZCTLTYPE, 10,			\
+					struct vzctl_env_create_data)
+#define VZCTL_VE_NETDEV		_IOW(VZCTLTYPE, 11,			\
+					struct vzctl_ve_netdev)
+#define VZCTL_VE_MEMINFO	_IOW(VZCTLTYPE, 13,                     \
+					struct vzctl_ve_meminfo)
+
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/linux/vzctl.h linux-2.6.9-ve023stab054/include/linux/vzctl.h
--- linux-2.6.9-100.orig/include/linux/vzctl.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vzctl.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,30 @@
+/*
+ *  include/linux/vzctl.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VZCTL_H
+#define _LINUX_VZCTL_H
+
+#include <linux/list.h>
+
+struct module;
+struct inode;
+struct file;
+struct vzioctlinfo {
+	unsigned type;
+	int (*func)(struct inode *, struct file *,
+			unsigned int, unsigned long);
+	struct module *owner;
+	struct list_head list;
+};
+
+extern void vzioctl_register(struct vzioctlinfo *inf);
+extern void vzioctl_unregister(struct vzioctlinfo *inf);
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/linux/vzctl_quota.h linux-2.6.9-ve023stab054/include/linux/vzctl_quota.h
--- linux-2.6.9-100.orig/include/linux/vzctl_quota.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vzctl_quota.h	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,43 @@
+/*
+ *  include/linux/vzctl_quota.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __LINUX_VZCTL_QUOTA_H__
+#define __LINUX_VZCTL_QUOTA_H__
+
+/*
+ * Quota management ioctl
+ */
+
+struct vz_quota_stat;
+struct vzctl_quotactl {
+	int cmd;
+	unsigned int quota_id;
+	struct vz_quota_stat *qstat;
+	char *ve_root;
+};
+
+struct vzctl_quotaugidctl {
+	int cmd;		/* subcommand */
+	unsigned int quota_id;	/* quota id where it applies to */
+	unsigned int ugid_index;/* for reading statistic. index of first
+				    uid/gid record to read */
+	unsigned int ugid_size;	/* size of ugid_buf array */
+	void *addr; 		/* user-level buffer */
+};
+
+#define VZDQCTLTYPE '+'
+#define VZCTL_QUOTA_CTL		_IOWR(VZDQCTLTYPE, 1,			\
+					struct vzctl_quotactl)
+#define VZCTL_QUOTA_NEW_CTL	_IOWR(VZDQCTLTYPE, 2,			\
+					struct vzctl_quotactl)
+#define VZCTL_QUOTA_UGID_CTL	_IOWR(VZDQCTLTYPE, 3,			\
+					struct vzctl_quotaugidctl)
+
+#endif /* __LINUX_VZCTL_QUOTA_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/vzctl_venet.h linux-2.6.9-ve023stab054/include/linux/vzctl_venet.h
--- linux-2.6.9-100.orig/include/linux/vzctl_venet.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vzctl_venet.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,36 @@
+/*
+ *  include/linux/vzctl_venet.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZCTL_VENET_H
+#define _VZCTL_VENET_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+struct vzctl_ve_ip_map {
+	envid_t veid;
+	int op;
+#define VE_IP_ADD	1
+#define VE_IP_DEL	2
+	struct sockaddr *addr;
+	int addrlen;
+};
+
+#define VENETCTLTYPE '('
+
+#define VENETCTL_VE_IP_MAP	_IOW(VENETCTLTYPE, 3,			\
+					struct vzctl_ve_ip_map)
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/linux/vzctl_veth.h linux-2.6.9-ve023stab054/include/linux/vzctl_veth.h
--- linux-2.6.9-100.orig/include/linux/vzctl_veth.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vzctl_veth.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,40 @@
+/*
+ *  include/linux/vzctl_veth.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZCTL_VETH_H
+#define _VZCTL_VETH_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+struct vzctl_ve_hwaddr {
+	envid_t veid;
+	int op;
+#define VE_ETH_ADD	1
+#define VE_ETH_DEL	2
+	unsigned char	dev_addr[6];
+	int addrlen;
+	char		dev_name[16];
+	unsigned char	dev_addr_ve[6];
+	int addrlen_ve;
+	char		dev_name_ve[16];
+};
+
+#define VETHCTLTYPE '['
+
+#define VETHCTL_VE_HWADDR	_IOW(VETHCTLTYPE, 3,			\
+					struct vzctl_ve_hwaddr)
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/linux/vzdq_tree.h linux-2.6.9-ve023stab054/include/linux/vzdq_tree.h
--- linux-2.6.9-100.orig/include/linux/vzdq_tree.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vzdq_tree.h	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,99 @@
+/*
+ *
+ * Copyright (C) 2005 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo disk quota tree definition
+ */
+
+#ifndef _VZDQ_TREE_H
+#define _VZDQ_TREE_H
+
+#include <linux/list.h>
+#include <asm/string.h>
+
+typedef unsigned int quotaid_t;
+#define QUOTAID_BITS		32
+#define QUOTAID_BBITS		4
+#define QUOTAID_EBITS		8
+
+#if QUOTAID_EBITS % QUOTAID_BBITS
+#error Quota bit assumption failure
+#endif
+
+#define QUOTATREE_BSIZE		(1 << QUOTAID_BBITS)
+#define QUOTATREE_BMASK		(QUOTATREE_BSIZE - 1)
+#define QUOTATREE_DEPTH		((QUOTAID_BITS + QUOTAID_BBITS - 1) \
+							/ QUOTAID_BBITS)
+#define QUOTATREE_EDEPTH	((QUOTAID_BITS + QUOTAID_EBITS - 1) \
+							/ QUOTAID_EBITS)
+#define QUOTATREE_BSHIFT(lvl)	((QUOTATREE_DEPTH - (lvl) - 1) * QUOTAID_BBITS)
+
+/*
+ * Depth of keeping unused node (not inclusive).
+ * 0 means release all nodes including root,
+ * QUOTATREE_DEPTH means never release nodes.
+ * Current value: release all nodes strictly after QUOTATREE_EDEPTH 
+ * (measured in external shift units).
+ */
+#define QUOTATREE_CDEPTH	(QUOTATREE_DEPTH \
+				- 2 * QUOTATREE_DEPTH / QUOTATREE_EDEPTH \
+				+ 1)
+
+/*
+ * Levels 0..(QUOTATREE_DEPTH-1) are tree nodes.
+ * On level i the maximal number of nodes is 2^(i*QUOTAID_BBITS),
+ * and each node contains 2^QUOTAID_BBITS pointers.
+ * Level 0 is a (single) tree root node.
+ *
+ * Nodes of level (QUOTATREE_DEPTH-1) contain pointers to caller's data.
+ * Nodes of lower levels contain pointers to nodes.
+ *
+ * Double pointer in array of i-level node, pointing to a (i+1)-level node
+ * (such as inside quotatree_find_state) are marked by level (i+1), not i.
+ * Level 0 double pointer is a pointer to root inside tree struct.
+ *
+ * The tree is permanent, i.e. all index blocks allocated are keeped alive to
+ * preserve the blocks numbers in the quota file tree to keep its changes
+ * locally.
+ */
+struct quotatree_node {
+	struct list_head list;
+	quotaid_t num;
+	void *blocks[QUOTATREE_BSIZE];
+};
+
+struct quotatree_level {
+	struct list_head usedlh, freelh;
+	quotaid_t freenum;
+};
+
+struct quotatree_tree {
+	struct quotatree_level levels[QUOTATREE_DEPTH];
+	struct quotatree_node *root;
+	unsigned int leaf_num;
+};
+
+struct quotatree_find_state {
+	void **block;
+	int level;
+};
+
+/* number of leafs (objects) and leaf level of the tree */
+#define QTREE_LEAFNUM(tree)	((tree)->leaf_num)
+#define QTREE_LEAFLVL(tree)	(&(tree)->levels[QUOTATREE_DEPTH - 1])
+
+struct quotatree_tree *quotatree_alloc(void);
+void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st);
+int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st, void *data);
+void quotatree_remove(struct quotatree_tree *tree, quotaid_t id);
+void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *));
+void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id);
+void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index);
+
+#endif /* _VZDQ_TREE_H */
+
diff -Nurap linux-2.6.9-100.orig/include/linux/vzevent.h linux-2.6.9-ve023stab054/include/linux/vzevent.h
--- linux-2.6.9-100.orig/include/linux/vzevent.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vzevent.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,13 @@
+#ifndef __LINUX_VZ_EVENT_H__
+#define __LINUX_VZ_EVENT_H__
+
+#if defined(CONFIG_VZ_EVENT) || defined(CONFIG_VZ_EVENT_MODULE)
+extern int vzevent_send(int msg, const char *attrs_fmt, ...);
+#else
+static inline int vzevent_send(int msg, const char *attrs_fmt, ...)
+{
+	return 0;
+}
+#endif
+
+#endif /* __LINUX_VZ_EVENT_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/vzquota.h linux-2.6.9-ve023stab054/include/linux/vzquota.h
--- linux-2.6.9-100.orig/include/linux/vzquota.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vzquota.h	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,294 @@
+/*
+ *
+ * Copyright (C) 2001-2005 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo disk quota implementation
+ */
+
+#ifndef _VZDQUOTA_H
+#define _VZDQUOTA_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/* vzquotactl syscall commands */
+#define VZ_DQ_CREATE		5 /* create quota master block */
+#define VZ_DQ_DESTROY		6 /* destroy qmblk */
+#define VZ_DQ_ON		7 /* mark dentry with already created qmblk */
+#define VZ_DQ_OFF		8 /* remove mark, don't destroy qmblk */
+#define VZ_DQ_SETLIMIT		9 /* set new limits */
+#define VZ_DQ_GETSTAT		10 /* get usage statistic */
+/* set of syscalls to maintain UGID quotas */
+#define VZ_DQ_UGID_GETSTAT	1 /* get usage/limits for ugid(s) */
+#define VZ_DQ_UGID_ADDSTAT	2 /* set usage/limits statistic for ugid(s) */
+#define VZ_DQ_UGID_GETGRACE	3 /* get expire times */
+#define VZ_DQ_UGID_SETGRACE	4 /* set expire times */
+#define VZ_DQ_UGID_GETCONFIG	5 /* get ugid_max limit, cnt, flags of qmblk */
+#define VZ_DQ_UGID_SETCONFIG	6 /* set ugid_max limit, flags of qmblk */
+#define VZ_DQ_UGID_SETLIMIT	7 /* set ugid B/I limits */
+#define VZ_DQ_UGID_SETINFO	8 /* set ugid info */
+
+/* common structure for vz and ugid quota */
+struct dq_stat {
+	/* blocks limits */
+	__u64	bhardlimit;	/* absolute limit in bytes */
+	__u64	bsoftlimit;	/* preferred limit in bytes */
+	time_t	btime;		/* time limit for excessive disk use */
+	__u64	bcurrent;	/* current bytes count */
+	/* inodes limits */
+	__u32	ihardlimit;	/* absolute limit on allocated inodes */
+	__u32	isoftlimit;	/* preferred inode limit */
+	time_t	itime;		/* time limit for excessive inode use */
+	__u32	icurrent;	/* current # allocated inodes */
+};
+
+/* Values for dq_info->flags */
+#define VZ_QUOTA_INODES 0x01       /* inodes limit warning printed */
+#define VZ_QUOTA_SPACE  0x02       /* space limit warning printed */
+
+struct dq_info {
+	time_t		bexpire;   /* expire timeout for excessive disk use */
+	time_t		iexpire;   /* expire timeout for excessive inode use */
+	unsigned	flags;	   /* see previos defines */
+};
+
+struct vz_quota_stat  {
+	struct dq_stat dq_stat;
+	struct dq_info dq_info;
+};
+
+/* UID/GID interface record - for user-kernel level exchange */
+struct vz_quota_iface {
+	unsigned int	qi_id;	   /* UID/GID this applies to */
+	unsigned int	qi_type;   /* USRQUOTA|GRPQUOTA */
+	struct dq_stat	qi_stat;   /* limits, options, usage stats */
+};
+
+/* values for flags and dq_flags */
+/* this flag is set if the userspace has been unable to provide usage
+ * information about all ugids
+ * if the flag is set, we don't allocate new UG quota blocks (their
+ * current usage is unknown) or free existing UG quota blocks (not to
+ * lose information that this block is ok) */
+#define VZDQUG_FIXED_SET	0x01
+/* permit to use ugid quota */
+#define VZDQUG_ON		0x02
+#define VZDQ_USRQUOTA		0x10
+#define VZDQ_GRPQUOTA		0x20
+#define VZDQ_NOACT		0x1000	/* not actual */
+#define VZDQ_NOQUOT		0x2000	/* not under quota tree */
+
+struct vz_quota_ugid_stat {
+	unsigned int	limit;	/* max amount of ugid records */
+	unsigned int	count;	/* amount of ugid records */
+	unsigned int	flags;	
+};
+
+struct vz_quota_ugid_setlimit {
+	unsigned int	type;	/* quota type (USR/GRP) */
+	unsigned int	id;	/* ugid */
+	struct if_dqblk dqb;	/* limits info */
+};
+
+struct vz_quota_ugid_setinfo {
+	unsigned int	type;	/* quota type (USR/GRP) */
+	struct if_dqinfo dqi;	/* grace info */
+};
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+#include <linux/time.h>
+#include <linux/vzquota_qlnk.h>
+#include <linux/vzdq_tree.h>
+
+/* One-second resolution for grace times */
+#define CURRENT_TIME_SECONDS	(get_seconds())
+
+/* Values for dq_info flags */
+#define VZ_QUOTA_INODES	0x01	   /* inodes limit warning printed */
+#define VZ_QUOTA_SPACE	0x02	   /* space limit warning printed */
+
+/* values for dq_state */
+#define VZDQ_STARTING		0 /* created, not turned on yet */
+#define VZDQ_WORKING		1 /* quota created, turned on */
+#define VZDQ_STOPING		2 /* created, turned on and off */
+
+/* master quota record - one per veid */
+struct vz_quota_master {
+	struct list_head	dq_hash;	/* next quota in hash list */
+	atomic_t		dq_count;	/* inode reference count */
+	unsigned int		dq_flags;	/* see VZDQUG_FIXED_SET */
+	unsigned int		dq_state;	/* see values above */
+	unsigned int		dq_id;		/* VEID this applies to */
+	struct dq_stat		dq_stat; 	/* limits, grace, usage stats */
+	struct dq_info		dq_info;	/* grace times and flags */
+	spinlock_t		dq_data_lock;	/* for dq_stat */
+
+	struct semaphore	dq_sem;		/* semaphore to protect 
+						   ugid tree */
+
+	struct list_head	dq_ilink_list;	/* list of vz_quota_ilink */
+	struct quotatree_tree	*dq_uid_tree;	/* vz_quota_ugid tree for UIDs */
+	struct quotatree_tree	*dq_gid_tree;	/* vz_quota_ugid tree for GIDs */
+	unsigned int		dq_ugid_count;	/* amount of ugid records */
+	unsigned int		dq_ugid_max;	/* max amount of ugid records */
+	struct dq_info		dq_ugid_info[MAXQUOTAS]; /* ugid grace times */
+
+	struct dentry		*dq_root_dentry;/* dentry of fs tree */
+	struct vfsmount		*dq_root_mnt;	/* vfsmnt of this dentry */
+	struct super_block	*dq_sb;	      /* superblock of our quota root */
+};
+
+/* UID/GID quota record - one per pair (quota_master, uid or gid) */
+struct vz_quota_ugid {
+	unsigned int		qugid_id;     /* UID/GID this applies to */
+	struct dq_stat		qugid_stat;   /* limits, options, usage stats */
+	int			qugid_type;   /* USRQUOTA|GRPQUOTA */
+	atomic_t		qugid_count;  /* reference count */
+};
+
+#define VZ_QUOTA_UGBAD		((struct vz_quota_ugid *)0xfeafea11)
+
+struct vz_quota_datast {
+	struct vz_quota_ilink qlnk;
+};
+
+#define VIRTINFO_QUOTA_GETSTAT	0
+#define VIRTINFO_QUOTA_ON	1
+#define VIRTINFO_QUOTA_OFF	2
+#define VIRTINFO_QUOTA_DISABLE	3
+
+struct virt_info_quota {
+	struct super_block *super;
+	struct dq_stat *qstat;
+};
+
+/*
+ * Interface to VZ quota core
+ */
+#define INODE_QLNK(inode)	(&(inode)->i_qlnk)
+#define QLNK_INODE(qlnk)	container_of((qlnk), struct inode, i_qlnk)
+
+#define VZ_QUOTA_BAD		((struct vz_quota_master *)0xefefefef)
+
+#define VZ_QUOTAO_SETE		1
+#define VZ_QUOTAO_INIT		2
+#define VZ_QUOTAO_DESTR		3
+#define VZ_QUOTAO_SWAP		4
+#define VZ_QUOTAO_INICAL	5
+#define VZ_QUOTAO_DRCAL		6
+#define VZ_QUOTAO_QSET		7
+#define VZ_QUOTAO_TRANS		8
+#define VZ_QUOTAO_ACT		9
+#define VZ_QUOTAO_DTREE		10
+#define VZ_QUOTAO_DET		11
+#define VZ_QUOTAO_ON		12
+#define VZ_QUOTAO_RE_LOCK	13
+
+extern struct semaphore vz_quota_sem;
+void inode_qmblk_lock(struct super_block *sb);
+void inode_qmblk_unlock(struct super_block *sb);
+void qmblk_data_read_lock(struct vz_quota_master *qmblk);
+void qmblk_data_read_unlock(struct vz_quota_master *qmblk);
+void qmblk_data_write_lock(struct vz_quota_master *qmblk);
+void qmblk_data_write_unlock(struct vz_quota_master *qmblk);
+
+/* for quota operations */
+void vzquota_inode_init_call(struct inode *inode);
+void vzquota_inode_drop_call(struct inode *inode);
+int vzquota_inode_transfer_call(struct inode *, struct iattr *);
+struct vz_quota_master *vzquota_inode_data(struct inode *inode,
+		struct vz_quota_datast *);
+void vzquota_data_unlock(struct inode *inode, struct vz_quota_datast *);
+int vzquota_rename_check(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir);
+struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode);
+/* for second-level quota */
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
+/* for management operations */
+struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
+		struct vz_quota_stat *qstat);
+void vzquota_free_master(struct vz_quota_master *);
+struct vz_quota_master *vzquota_find_master(unsigned int quota_id);
+int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
+		struct vz_quota_master *qmblk);
+int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk);
+int vzquota_get_super(struct super_block *sb);
+void vzquota_put_super(struct super_block *sb);
+
+static inline struct vz_quota_master *qmblk_get(struct vz_quota_master *qmblk)
+{
+	if (!atomic_read(&qmblk->dq_count))
+		BUG();
+	atomic_inc(&qmblk->dq_count);
+	return qmblk;
+}
+
+static inline void __qmblk_put(struct vz_quota_master *qmblk)
+{
+	atomic_dec(&qmblk->dq_count);
+}
+
+static inline void qmblk_put(struct vz_quota_master *qmblk)
+{
+	if (!atomic_dec_and_test(&qmblk->dq_count))
+		return;
+	vzquota_free_master(qmblk);
+}
+
+extern struct list_head vzquota_hash_table[];
+extern int vzquota_hash_size;
+
+/*
+ * Interface to VZ UGID quota
+ */
+extern struct quotactl_ops vz_quotactl_operations;
+extern struct dquot_operations vz_quota_operations2;
+extern struct quota_format_type vz_quota_empty_v2_format;
+
+#define QUGID_TREE(qmblk, type)	(((type) == USRQUOTA) ?		\
+					qmblk->dq_uid_tree :	\
+					qmblk->dq_gid_tree)
+
+#define VZDQUG_FIND_DONT_ALLOC	1
+#define VZDQUG_FIND_FAKE	2
+struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
+		unsigned int quota_id, int type, int flags);
+struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
+		unsigned int quota_id, int type, int flags);
+struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid);
+void vzquota_put_ugid(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid);
+void vzquota_kill_ugid(struct vz_quota_master *qmblk);
+int vzquota_ugid_init(void);
+void vzquota_ugid_release(void);
+int vzquota_transfer_usage(struct inode *inode, int mask,
+		struct vz_quota_ilink *qlnk);
+void vzquota_inode_off(struct inode *inode);
+
+struct vzctl_quotaugidctl;
+long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub);
+
+/*
+ * Other VZ quota parts
+ */
+extern struct dquot_operations vz_quota_operations;
+
+long do_vzquotactl(int cmd, unsigned int quota_id,
+			  struct vz_quota_stat *qstat, const char *ve_root);
+int vzquota_proc_init(void);
+void vzquota_proc_release(void);
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
+extern struct semaphore vz_quota_sem;
+
+void vzaquota_init(void);
+void vzaquota_fini(void);
+
+#endif /* __KERNEL__ */
+
+#endif /* _VZDQUOTA_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/vzquota_qlnk.h linux-2.6.9-ve023stab054/include/linux/vzquota_qlnk.h
--- linux-2.6.9-100.orig/include/linux/vzquota_qlnk.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vzquota_qlnk.h	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,25 @@
+/*
+ *  include/linux/vzquota_qlnk.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZDQUOTA_QLNK_H
+#define _VZDQUOTA_QLNK_H
+
+struct vz_quota_master;
+struct vz_quota_ugid;
+
+/* inode link, used to track inodes using quota via dq_ilink_list */
+struct vz_quota_ilink {
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid *qugid[MAXQUOTAS];
+	struct list_head list;
+	unsigned char origin[2];
+};
+
+#endif /* _VZDQUOTA_QLNK_H */
diff -Nurap linux-2.6.9-100.orig/include/linux/vzratelimit.h linux-2.6.9-ve023stab054/include/linux/vzratelimit.h
--- linux-2.6.9-100.orig/include/linux/vzratelimit.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vzratelimit.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,28 @@
+/*
+ *  include/linux/vzratelimit.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VZ_RATELIMIT_H__
+#define __VZ_RATELIMIT_H__
+
+/*
+ * Generic ratelimiting stuff.
+ */
+
+struct vz_rate_info {
+	int burst;
+	int interval; /* jiffy_t per event */
+	int bucket; /* kind of leaky bucket */
+	unsigned long last; /* last event */
+};
+
+/* Return true if rate limit permits. */
+int vz_ratelimit(struct vz_rate_info *p);
+
+#endif /* __VZ_RATELIMIT_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/vzstat.h linux-2.6.9-ve023stab054/include/linux/vzstat.h
--- linux-2.6.9-100.orig/include/linux/vzstat.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/linux/vzstat.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,176 @@
+/*
+ *  include/linux/vzstat.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VZSTAT_H__
+#define __VZSTAT_H__
+
+struct swap_cache_info_struct {
+	unsigned long add_total;
+	unsigned long del_total;
+	unsigned long find_success;
+	unsigned long find_total;
+	unsigned long noent_race;
+	unsigned long exist_race;
+	unsigned long remove_race;
+};
+
+struct kstat_lat_snap_struct {
+	cycles_t maxlat, totlat;
+	unsigned long count;
+};
+struct kstat_lat_pcpu_snap_struct {
+	cycles_t maxlat, totlat;
+	unsigned long count;
+	seqcount_t lock;
+} ____cacheline_maxaligned_in_smp;
+
+struct kstat_lat_struct {
+	struct kstat_lat_snap_struct cur, last;
+	cycles_t avg[3];
+};
+struct kstat_lat_pcpu_struct {
+	struct kstat_lat_pcpu_snap_struct cur[NR_CPUS];
+	cycles_t max_snap;
+	struct kstat_lat_snap_struct last;
+	cycles_t avg[3];
+};
+
+struct kstat_perf_snap_struct {
+	cycles_t wall_tottime, cpu_tottime;
+	cycles_t wall_maxdur, cpu_maxdur;
+	unsigned long count;
+};
+struct kstat_perf_struct {
+	struct kstat_perf_snap_struct cur, last;
+};
+
+struct kstat_zone_avg {
+	unsigned long		free_pages_avg[3],
+				nr_active_avg[3],
+				nr_inactive_avg[3];
+};
+
+#define KSTAT_ALLOCSTAT_NR 5
+
+struct kernel_stat_glob {
+	unsigned long nr_unint_avg[3];
+
+	unsigned long alloc_fails[KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_pcpu_struct sched_lat;
+	struct kstat_lat_struct swap_in;
+
+	struct kstat_perf_struct ttfp, cache_reap,
+			refill_inact, shrink_icache, shrink_dcache;
+
+	struct kstat_zone_avg zone_avg[3];	/* MAX_NR_ZONES */
+} ____cacheline_aligned;
+
+extern struct kernel_stat_glob kstat_glob ____cacheline_aligned;
+extern spinlock_t kstat_glb_lock;
+
+#define KSTAT_PERF_ENTER(name)				\
+	unsigned long flags;				\
+	cycles_t start, sleep_time;			\
+							\
+	start = get_cycles();				\
+	sleep_time = VE_TASK_INFO(current)->sleep_time;	\
+
+#define KSTAT_PERF_LEAVE(name)				\
+	spin_lock_irqsave(&kstat_glb_lock, flags);	\
+	kstat_glob.name.cur.count++;			\
+	start = get_cycles() - start;			\
+	if (kstat_glob.name.cur.wall_maxdur < start)	\
+		kstat_glob.name.cur.wall_maxdur = start;\
+	kstat_glob.name.cur.wall_tottime += start;	\
+	start -= VE_TASK_INFO(current)->sleep_time -	\
+					sleep_time;	\
+	if (kstat_glob.name.cur.cpu_maxdur < start)	\
+		kstat_glob.name.cur.cpu_maxdur = start;	\
+	kstat_glob.name.cur.cpu_tottime += start;	\
+	spin_unlock_irqrestore(&kstat_glb_lock, flags);	\
+
+/*
+ * Add another statistics reading.
+ * Serialization is the caller's due.
+ */
+static inline void KSTAT_LAT_ADD(struct kstat_lat_struct *p,
+		cycles_t dur)
+{
+	p->cur.count++;
+	if (p->cur.maxlat < dur)
+		p->cur.maxlat = dur;
+	p->cur.totlat += dur;
+}
+
+static inline void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, int cpu,
+		cycles_t dur)
+{
+	struct kstat_lat_pcpu_snap_struct *cur;
+
+	cur = &p->cur[cpu];
+	write_seqcount_begin(&cur->lock);
+	cur->count++;
+	if (cur->maxlat < dur)
+		cur->maxlat = dur;
+	cur->totlat += dur;
+	write_seqcount_end(&cur->lock);
+}
+
+/*
+ * Move current statistics to last, clear last.
+ * Serialization is the caller's due.
+ */
+static inline void KSTAT_LAT_UPDATE(struct kstat_lat_struct *p)
+{
+	cycles_t m;
+	memcpy(&p->last, &p->cur, sizeof(p->last));
+	p->cur.maxlat = 0;
+	m = p->last.maxlat;
+	CALC_LOAD(p->avg[0], EXP_1, m)
+	CALC_LOAD(p->avg[1], EXP_5, m)
+	CALC_LOAD(p->avg[2], EXP_15, m)
+}
+
+static inline void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p)
+{
+	unsigned i, cpu;
+	struct kstat_lat_pcpu_snap_struct snap, *cur;
+	cycles_t m;
+
+	memset(&p->last, 0, sizeof(p->last));
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		cur = &p->cur[cpu];
+		do {
+			i = read_seqcount_begin(&cur->lock);
+			memcpy(&snap, cur, sizeof(snap));
+		} while (read_seqcount_retry(&cur->lock, i));
+		/* 
+		 * read above and this update of maxlat is not atomic,
+		 * but this is OK, since it happens rarely and losing
+		 * a couple of peaks is not essential. xemul
+		 */
+		cur->maxlat = 0;
+
+		p->last.count += snap.count;
+		p->last.totlat += snap.totlat;
+		if (p->last.maxlat < snap.maxlat)
+			p->last.maxlat = snap.maxlat;
+	}
+
+	m = (p->last.maxlat > p->max_snap ? p->last.maxlat : p->max_snap);
+	CALC_LOAD(p->avg[0], EXP_1, m);
+	CALC_LOAD(p->avg[1], EXP_5, m);
+	CALC_LOAD(p->avg[2], EXP_15, m);
+	/* reset max_snap to calculate it correctly next time */
+	p->max_snap = 0;
+}
+
+#endif /* __VZSTAT_H__ */
diff -Nurap linux-2.6.9-100.orig/include/linux/zlib.h linux-2.6.9-ve023stab054/include/linux/zlib.h
--- linux-2.6.9-100.orig/include/linux/zlib.h	2004-10-19 01:54:31.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/linux/zlib.h	2011-06-15 19:26:18.000000000 +0400
@@ -506,6 +506,11 @@ extern int zlib_deflateReset (z_streamp 
    stream state was inconsistent (such as zalloc or state being NULL).
 */
 
+static inline unsigned long deflateBound(unsigned long s)
+{
+	return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
+}
+
 extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
 /*
      Dynamically update the compression level and compression strategy.  The
diff -Nurap linux-2.6.9-100.orig/include/net/af_unix.h linux-2.6.9-ve023stab054/include/net/af_unix.h
--- linux-2.6.9-100.orig/include/net/af_unix.h	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/af_unix.h	2011-06-15 19:26:21.000000000 +0400
@@ -3,6 +3,7 @@
 extern void unix_inflight(struct file *fp);
 extern void unix_notinflight(struct file *fp);
 extern void unix_gc(void);
+extern void unix_destruct_fds(struct sk_buff *skb);
 extern void wait_for_unix_gc(void);
 
 #define UNIX_HASH_SIZE	256
@@ -14,23 +15,37 @@ extern atomic_t unix_tot_inflight;
 
 static inline struct sock *first_unix_socket(int *i)
 {
+	struct sock *s;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
+		for (s = sk_head(&unix_socket_table[*i]);
+		     s != NULL && !ve_accessible(VE_OWNER_SK(s), ve);
+		     s = sk_next(s));
+		if (s != NULL)
+			return s;
 	}
 	return NULL;
 }
 
 static inline struct sock *next_unix_socket(int *i, struct sock *s)
 {
-	struct sock *next = sk_next(s);
-	/* More in this chain? */
-	if (next)
-		return next;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+	for (s = sk_next(s); s != NULL; s = sk_next(s)) {
+		if (!ve_accessible(VE_OWNER_SK(s), ve))
+			continue;
+		return s;
+	}
 	/* Look for next non-empty chain. */
 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
+		for (s = sk_head(&unix_socket_table[*i]);
+		     s != NULL && !ve_accessible(VE_OWNER_SK(s), ve);
+		     s = sk_next(s));
+		if (s != NULL)
+			return s;
 	}
 	return NULL;
 }
diff -Nurap linux-2.6.9-100.orig/include/net/arp.h linux-2.6.9-ve023stab054/include/net/arp.h
--- linux-2.6.9-100.orig/include/net/arp.h	2004-10-19 01:53:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/arp.h	2011-06-15 19:26:19.000000000 +0400
@@ -7,7 +7,14 @@
 
 #define HAVE_ARP_CREATE
 
-extern struct neigh_table arp_tbl;
+#ifdef CONFIG_VE
+#define arp_tbl		(*(get_exec_env()->ve_arp_tbl))
+extern int ve_arp_init(struct ve_struct *ve);
+extern void ve_arp_fini(struct ve_struct *ve);
+#else
+struct neigh_table	global_arp_tbl;
+#define arp_tbl		global_arp_tbl
+#endif
 
 extern void	arp_init(void);
 extern int	arp_rcv(struct sk_buff *skb, struct net_device *dev,
diff -Nurap linux-2.6.9-100.orig/include/net/compat.h linux-2.6.9-ve023stab054/include/net/compat.h
--- linux-2.6.9-100.orig/include/net/compat.h	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/compat.h	2011-06-15 19:26:22.000000000 +0400
@@ -23,6 +23,12 @@ struct compat_cmsghdr {
 	compat_int_t	cmsg_type;
 };
 
+#if defined(CONFIG_X86_64)
+#define is_current_32bits()	(current_thread_info()->flags & _TIF_IA32)
+#else
+#define is_current_32bits()	0
+#endif
+
 #else /* defined(CONFIG_COMPAT) */
 #define compat_msghdr	msghdr		/* to avoid compiler warnings */
 #endif /* defined(CONFIG_COMPAT) */
diff -Nurap linux-2.6.9-100.orig/include/net/flow.h linux-2.6.9-ve023stab054/include/net/flow.h
--- linux-2.6.9-100.orig/include/net/flow.h	2004-10-19 01:55:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/flow.h	2011-06-15 19:26:19.000000000 +0400
@@ -10,6 +10,7 @@
 #include <linux/in6.h>
 #include <asm/atomic.h>
 
+struct ve_struct;
 struct flowi {
 	int	oif;
 	int	iif;
@@ -77,6 +78,9 @@ struct flowi {
 #define fl_icmp_type	uli_u.icmpt.type
 #define fl_icmp_code	uli_u.icmpt.code
 #define fl_ipsec_spi	uli_u.spi
+#ifdef CONFIG_VE
+	struct ve_struct *owner_env;
+#endif
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 #define FLOW_DIR_IN	0
diff -Nurap linux-2.6.9-100.orig/include/net/icmp.h linux-2.6.9-ve023stab054/include/net/icmp.h
--- linux-2.6.9-100.orig/include/net/icmp.h	2004-10-19 01:54:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/icmp.h	2011-06-15 19:26:19.000000000 +0400
@@ -34,9 +34,14 @@ struct icmp_err {
 
 extern struct icmp_err icmp_err_convert[];
 DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
-#define ICMP_INC_STATS(field)		SNMP_INC_STATS(icmp_statistics, field)
-#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmp_statistics, field)
-#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(icmp_statistics, field)
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_icmp_statistics (get_exec_env()->_icmp_statistics)
+#else
+#define ve_icmp_statistics icmp_statistics
+#endif
+#define ICMP_INC_STATS(field)		SNMP_INC_STATS(ve_icmp_statistics, field)
+#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(ve_icmp_statistics, field)
+#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_icmp_statistics, field)
 
 extern void	icmp_send(struct sk_buff *skb_in,  int type, int code, u32 info);
 extern int	icmp_rcv(struct sk_buff *skb);
diff -Nurap linux-2.6.9-100.orig/include/net/ip.h linux-2.6.9-ve023stab054/include/net/ip.h
--- linux-2.6.9-100.orig/include/net/ip.h	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/ip.h	2011-06-15 19:26:19.000000000 +0400
@@ -151,15 +151,25 @@ struct ipv4_config
 
 extern struct ipv4_config ipv4_config;
 DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
-#define IP_INC_STATS(field)		SNMP_INC_STATS(ip_statistics, field)
-#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ip_statistics, field)
-#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ip_statistics, field)
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_ip_statistics (get_exec_env()->_ip_statistics)
+#else
+#define ve_ip_statistics ip_statistics
+#endif
+#define IP_INC_STATS(field)		SNMP_INC_STATS(ve_ip_statistics, field)
+#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_ip_statistics, field)
+#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_ip_statistics, field)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
-#define NET_INC_STATS(field)		SNMP_INC_STATS(net_statistics, field)
-#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(net_statistics, field)
-#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(net_statistics, field)
-#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
-#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_net_statistics (get_exec_env()->_net_statistics)
+#else
+#define ve_net_statistics net_statistics
+#endif
+#define NET_INC_STATS(field)		SNMP_INC_STATS(ve_net_statistics, field)
+#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_net_statistics, field)
+#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_net_statistics, field)
+#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(ve_net_statistics, field, adnd)
+#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(ve_net_statistics, field, adnd)
 
 extern int sysctl_local_port_range[2];
 extern int sysctl_ip_default_ttl;
diff -Nurap linux-2.6.9-100.orig/include/net/ip_fib.h linux-2.6.9-ve023stab054/include/net/ip_fib.h
--- linux-2.6.9-100.orig/include/net/ip_fib.h	2004-10-19 01:55:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/ip_fib.h	2011-06-15 19:26:19.000000000 +0400
@@ -138,10 +138,22 @@ struct fib_table {
 	unsigned char	tb_data[0];
 };
 
+struct fn_zone;
+struct fn_hash
+{
+	struct fn_zone	*fn_zones[33];
+	struct fn_zone	*fn_zone_list;
+};
+
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ip_fib_local_table 	get_exec_env()->_local_table
+#define ip_fib_main_table 	get_exec_env()->_main_table
+#else
 extern struct fib_table *ip_fib_local_table;
 extern struct fib_table *ip_fib_main_table;
+#endif
 
 static inline struct fib_table *fib_get_table(int id)
 {
@@ -173,7 +185,12 @@ static inline void fib_select_default(co
 #define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL])
 #define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define fib_tables get_exec_env()->_fib_tables
+#else
 extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
+#endif
+
 extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
 extern struct fib_table *__fib_new_table(int id);
 extern void fib_rule_put(struct fib_rule *r);
@@ -219,10 +236,19 @@ extern u32  __fib_res_prefsrc(struct fib
 
 /* Exported by fib_hash.c */
 extern struct fib_table *fib_hash_init(int id);
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+struct ve_struct;
+extern int init_ve_route(struct ve_struct *ve);
+extern void fini_ve_route(struct ve_struct *ve);
+#else
+#define init_ve_route(ve)	(0)
+#define fini_ve_route(ve)	do { } while (0)
+#endif
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 /* Exported by fib_rules.c */
-
+extern int fib_rules_create(void);
+extern void fib_rules_destroy(void);
 extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
diff -Nurap linux-2.6.9-100.orig/include/net/ndisc.h linux-2.6.9-ve023stab054/include/net/ndisc.h
--- linux-2.6.9-100.orig/include/net/ndisc.h	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/ndisc.h	2011-06-15 19:26:19.000000000 +0400
@@ -37,7 +37,14 @@
 #include <net/neighbour.h>
 #include <asm/atomic.h>
 
-extern struct neigh_table nd_tbl;
+#ifdef CONFIG_VE
+#define nd_tbl		(*(get_exec_env()->ve_nd_tbl))
+extern int ve_ndisc_init(struct ve_struct *ve);
+extern void ve_ndisc_fini(struct ve_struct *ve);
+#else
+extern struct neigh_table global_nd_tbl;
+#define nd_tbl		global_nd_tbl
+#endif
 
 struct nd_msg {
         struct icmp6hdr	icmph;
diff -Nurap linux-2.6.9-100.orig/include/net/neighbour.h linux-2.6.9-ve023stab054/include/net/neighbour.h
--- linux-2.6.9-100.orig/include/net/neighbour.h	2011-06-09 19:23:04.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/neighbour.h	2011-06-15 19:26:19.000000000 +0400
@@ -191,6 +191,8 @@ struct neigh_table
 	atomic_t		entries;
 	rwlock_t		lock;
 	unsigned long		last_rand;
+	struct ve_struct	*owner_env;
+	struct user_beancounter *owner_ub;
 	struct neigh_parms	*parms_list;
 	kmem_cache_t		*kmem_cachep;
 	struct neigh_statistics	*stats;
@@ -211,7 +213,7 @@ struct neigh_table
 #define NEIGH_UPDATE_F_ISROUTER			0x40000000
 #define NEIGH_UPDATE_F_ADMIN			0x80000000
 
-extern void			neigh_table_init(struct neigh_table *tbl);
+extern int			neigh_table_init(struct neigh_table *tbl);
 extern int			neigh_table_clear(struct neigh_table *tbl);
 extern struct neighbour *	neigh_lookup(struct neigh_table *tbl,
 					     const void *pkey,
diff -Nurap linux-2.6.9-100.orig/include/net/scm.h linux-2.6.9-ve023stab054/include/net/scm.h
--- linux-2.6.9-100.orig/include/net/scm.h	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/scm.h	2011-06-15 19:26:19.000000000 +0400
@@ -43,7 +43,7 @@ static __inline__ int scm_send(struct so
 	memset(scm, 0, sizeof(*scm));
 	scm->creds.uid = current->uid;
 	scm->creds.gid = current->gid;
-	scm->creds.pid = current->tgid;
+	scm->creds.pid = virt_tgid(current);
 	if (msg->msg_controllen <= 0)
 		return 0;
 	return __scm_send(sock, msg, scm);
diff -Nurap linux-2.6.9-100.orig/include/net/sock.h linux-2.6.9-ve023stab054/include/net/sock.h
--- linux-2.6.9-100.orig/include/net/sock.h	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/sock.h	2011-06-15 19:26:19.000000000 +0400
@@ -55,6 +55,8 @@
 #include <net/dst.h>
 #include <net/checksum.h>
 
+#include <ub/ub_net.h>
+
 /*
  * This structure really needs to be cleaned up.
  * Most of it is for TCP, and not used by any of
@@ -266,8 +268,12 @@ struct sock {
 						  struct sk_buff *skb);  
 	void			(*sk_create_child)(struct sock *sk, struct sock *newsk);
 	void                    (*sk_destruct)(struct sock *sk);
+	struct sock_beancounter sk_bc;
+	struct ve_struct *sk_owner_env;
 };
 
+DCL_VE_OWNER_PROTO(SK, SLAB, struct sock, sk_owner_env, , (noinline, regparm(1)))
+
 /*
  * Hashed lists helper routines
  */
@@ -488,6 +494,8 @@ do {	if (!(__sk)->sk_backlog.tail) {				
 })
 
 extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
+extern int __sk_stream_wait_memory(struct sock *sk, long *timeo_p,
+				unsigned long amount);
 extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
 extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
 extern int sk_stream_error(struct sock *sk, int flags, int err);
@@ -683,8 +691,11 @@ static inline void sk_stream_writequeue_
 
 static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
 {
-	return (int)skb->truesize <= sk->sk_forward_alloc ||
-		sk_stream_mem_schedule(sk, skb->truesize, 1);
+	if ((int)skb->truesize > sk->sk_forward_alloc &&
+		!sk_stream_mem_schedule(sk, skb->truesize, 1))
+		/* The situation is bad according to mainstream. Den */
+		return 0;
+	return ub_tcprcvbuf_charge(sk, skb) == 0;
 }
 
 /* Used by processes to "lock" a socket state, so that
@@ -733,6 +744,11 @@ extern struct sk_buff 		*sock_alloc_send
 						     unsigned long size,
 						     int noblock,
 						     int *errcode);
+extern struct sk_buff 		*sock_alloc_send_skb2(struct sock *sk,
+						      unsigned long size,
+						      unsigned long size2,
+						      int noblock,
+						      int *errcode);
 extern struct sk_buff 		*sock_alloc_send_pskb(struct sock *sk,
 						      unsigned long header_len,
 						      unsigned long data_len,
@@ -1149,6 +1165,10 @@ static inline int sock_queue_rcv_skb(str
 		goto out;
 	}
 
+	err = ub_sockrcvbuf_charge(sk, skb);
+	if (err < 0)
+		goto out;
+
 	/* It would be deadlock, if sock_queue_rcv_skb is used
 	   with socket lock! We assume that users of this
 	   function are lock free.
diff -Nurap linux-2.6.9-100.orig/include/net/tcp.h linux-2.6.9-ve023stab054/include/net/tcp.h
--- linux-2.6.9-100.orig/include/net/tcp.h	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/tcp.h	2011-06-15 19:26:20.000000000 +0400
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/cache.h>
 #include <linux/percpu.h>
+#include <linux/ve_owner.h>
 #include <net/checksum.h>
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -39,6 +40,10 @@
 #endif
 #include <linux/seq_file.h>
 
+
+#define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
+#define TCP_OFF(sk)	(sk->sk_sndmsg_off)
+
 /* This is for all connections with a full identity, no wildcards.
  * New scheme, half the table is for TIME_WAIT, the other half is
  * for the rest.  I'll experiment with dynamic table growth later.
@@ -83,12 +88,16 @@ struct tcp_ehash_bucket {
  * ports are created in O(1) time?  I thought so. ;-)	-DaveM
  */
 struct tcp_bind_bucket {
+	struct ve_struct	*owner_env;
 	unsigned short		port;
 	signed short		fastreuse;
 	struct hlist_node	node;
 	struct hlist_head	owners;
 };
 
+DCL_VE_OWNER_PROTO(TB, GENERIC, struct tcp_bind_bucket, owner_env,
+						inline, (always_inline));
+
 #define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node)
 
 struct tcp_bind_hashbucket {
@@ -155,16 +164,17 @@ extern struct tcp_hashinfo {
 
 extern kmem_cache_t *tcp_bucket_cachep;
 extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
-						 unsigned short snum);
+						 unsigned short snum,
+						 struct ve_struct *env);
 extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb);
 extern void tcp_bucket_unlock(struct sock *sk);
 extern int tcp_port_rover;
 extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);
 
 /* These are AF independent. */
-static __inline__ int tcp_bhashfn(__u16 lport)
+static __inline__ int tcp_bhashfn(__u16 lport, unsigned veid)
 {
-	return (lport & (tcp_bhash_size - 1));
+	return ((lport + (veid ^ (veid >> 16))) & (tcp_bhash_size - 1));
 }
 
 extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
@@ -214,13 +224,19 @@ struct tcp_tw_bucket {
 	unsigned long		tw_ttd;
 	struct tcp_bind_bucket	*tw_tb;
 	struct hlist_node	tw_death_node;
+	spinlock_t		tw_lock;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct in6_addr		tw_v6_daddr;
 	struct in6_addr		tw_v6_rcv_saddr;
 	int			tw_v6_ipv6only;
 #endif
+	envid_t			tw_owner_env;
 };
 
+#define TW_VEID(tw)	((tw)->tw_owner_env)
+#define SET_TW_VEID(tw, veid)	((tw)->tw_owner_env) = (veid)
+
+
 static __inline__ void tw_add_node(struct tcp_tw_bucket *tw,
 				   struct hlist_head *list)
 {
@@ -301,7 +317,11 @@ static inline int tcp_v6_ipv6only(const 
 # define tcp_v6_ipv6only(__sk)		0
 #endif
 
+#define TW_WSCALE_MASK		0x0f
+#define TW_WSCALE_SPEC		0x10
+
 extern kmem_cache_t *tcp_timewait_cachep;
+#include <ub/ub_net.h>
 
 static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
 {
@@ -337,28 +357,38 @@ extern void tcp_tw_deschedule(struct tcp
 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
 	__u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
 #endif /* __BIG_ENDIAN */
-#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+#define TCP_IPV4_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie))	&&	\
 	 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+#define TCP_IPV4_TW_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) &&	\
 	 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) &&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #else /* 32-bit arch */
 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
-#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+#define TCP_IPV4_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
 	((inet_sk(__sk)->daddr			== (__saddr))	&&	\
 	 (inet_sk(__sk)->rcv_saddr		== (__daddr))	&&	\
 	 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+#define TCP_IPV4_TW_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
 	((tcptw_sk(__sk)->tw_daddr		== (__saddr))	&&	\
 	 (tcptw_sk(__sk)->tw_rcv_saddr		== (__daddr))	&&	\
 	 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) &&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #endif /* 64-bit arch */
 
+#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif, __ve)\
+        (TCP_IPV4_MATCH_ALLVE((__sk), (__cookie), (__saddr), (__daddr),	    \
+							(__ports), (__dif)) \
+	 && ve_accessible_strict(VE_OWNER_SK((__sk)), (__ve)))
+
+#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif, __ve)\
+        (TCP_IPV4_TW_MATCH_ALLVE((__sk), (__cookie), (__saddr), (__daddr),  \
+							(__ports), (__dif)) \
+	 && ve_accessible_strict(TW_VEID(tcptw_sk(__sk)), VEID(__ve)))
+
 #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif)	   \
 	(((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports))   	&& \
 	 ((__sk)->sk_family		== AF_INET6)		&& \
@@ -367,16 +397,16 @@ extern void tcp_tw_deschedule(struct tcp
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 
 /* These can have wildcards, don't try too hard. */
-static __inline__ int tcp_lhashfn(unsigned short num)
+static __inline__ int tcp_lhashfn(unsigned short num, unsigned veid)
 {
-	return num & (TCP_LHTABLE_SIZE - 1);
+	return ((num + (veid ^ (veid >> 16))) & (TCP_LHTABLE_SIZE - 1));
 }
 
 static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
 {
-	return tcp_lhashfn(inet_sk(sk)->num);
+	return tcp_lhashfn(inet_sk(sk)->num, VEID(VE_OWNER_SK(sk)));
 }
-
+ 
 #define MAX_TCP_HEADER	(128 + MAX_HEADER)
 
 /* 
@@ -586,6 +616,8 @@ extern int sysctl_tcp_rfc1337;
 extern int sysctl_tcp_abort_on_overflow;
 extern int sysctl_tcp_max_orphans;
 extern int sysctl_tcp_max_tw_buckets;
+extern int sysctl_tcp_max_tw_kmem_fraction;
+extern int sysctl_tcp_max_tw_buckets_ve;
 extern int sysctl_tcp_fack;
 extern int sysctl_tcp_reordering;
 extern int sysctl_tcp_ecn;
@@ -594,7 +626,9 @@ extern int sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
 extern int sysctl_tcp_app_win;
+#ifndef sysctl_tcp_adv_win_scale
 extern int sysctl_tcp_adv_win_scale;
+#endif
 extern int sysctl_tcp_tw_reuse;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_low_latency;
@@ -612,6 +646,7 @@ extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
+extern int sysctl_tcp_use_sg;
 
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
@@ -764,12 +799,17 @@ static inline int between(__u32 seq1, __
 extern struct proto tcp_prot;
 
 DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
-#define TCP_INC_STATS(field)		SNMP_INC_STATS(tcp_statistics, field)
-#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(tcp_statistics, field)
-#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(tcp_statistics, field)
-#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(tcp_statistics, field)
-#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(tcp_statistics, field, val)
-#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(tcp_statistics, field, val)
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_tcp_statistics (get_exec_env()->_tcp_statistics)
+#else
+#define ve_tcp_statistics tcp_statistics
+#endif
+#define TCP_INC_STATS(field)		SNMP_INC_STATS(ve_tcp_statistics, field)
+#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_tcp_statistics, field)
+#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_tcp_statistics, field)
+#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(ve_tcp_statistics, field)
+#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(ve_tcp_statistics, field, val)
+#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(ve_tcp_statistics, field, val)
 
 extern void			tcp_put_port(struct sock *sk);
 extern void			tcp_inherit_port(struct sock *sk, struct sock *child);
@@ -837,9 +877,9 @@ static __inline__ void tcp_delack_init(s
 	memset(&tp->ack, 0, sizeof(tp->ack));
 }
 
-static inline void tcp_clear_options(struct tcp_opt *tp)
+static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 {
- 	tp->tstamp_ok = tp->sack_ok = tp->wscale_ok = tp->snd_wscale = 0;
+ 	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
 }
 
 enum tcp_tw_status
@@ -888,7 +928,7 @@ extern int			tcp_recvmsg(struct kiocb *i
 extern int			tcp_listen_start(struct sock *sk);
 
 extern void			tcp_parse_options(struct sk_buff *skb,
-						  struct tcp_opt *tp,
+						  struct tcp_options_received *opt_rx,
 						  int estab);
 
 /*
@@ -1071,7 +1111,7 @@ static __inline__ void __tcp_fast_path_o
 
 static __inline__ void tcp_fast_path_on(struct tcp_opt *tp)
 {
-	__tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);
+	__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
 }
 
 static inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp)
@@ -1108,7 +1148,7 @@ extern u32	__tcp_select_window(struct so
  * only use of the low 32-bits of jiffies and hide the ugly
  * casts with the following macro.
  */
-#define tcp_time_stamp		((__u32)(jiffies))
+#define tcp_time_stamp		((__u32)(jiffies + get_exec_env()->jiffies_fixup))
 
 /* This is what the send packet queueing engine uses to pass
  * TCP per-packet control information to the transmission
@@ -1353,7 +1393,7 @@ static inline __u32 tcp_current_ssthresh
 
 static inline void tcp_sync_left_out(struct tcp_opt *tp)
 {
-	if (tp->sack_ok &&
+	if (tp->rx_opt.sack_ok &&
 	    (tcp_get_pcount(&tp->sacked_out) >=
 	     tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out)))
 		tcp_set_pcount(&tp->sacked_out,
@@ -1684,39 +1724,39 @@ static __inline__ void tcp_done(struct s
 		tcp_destroy_sock(sk);
 }
 
-static __inline__ void tcp_sack_reset(struct tcp_opt *tp)
+static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
 {
-	tp->dsack = 0;
-	tp->eff_sacks = 0;
-	tp->num_sacks = 0;
+	rx_opt->dsack = 0;
+	rx_opt->eff_sacks = 0;
+	rx_opt->num_sacks = 0;
 }
 
 static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp, __u32 tstamp)
 {
-	if (tp->tstamp_ok) {
+	if (tp->rx_opt.tstamp_ok) {
 		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
 					  (TCPOPT_NOP << 16) |
 					  (TCPOPT_TIMESTAMP << 8) |
 					  TCPOLEN_TIMESTAMP);
 		*ptr++ = htonl(tstamp);
-		*ptr++ = htonl(tp->ts_recent);
+		*ptr++ = htonl(tp->rx_opt.ts_recent);
 	}
-	if (tp->eff_sacks) {
-		struct tcp_sack_block *sp = tp->dsack ? tp->duplicate_sack : tp->selective_acks;
+	if (tp->rx_opt.eff_sacks) {
+		struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
 		int this_sack;
 
 		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
 					  (TCPOPT_NOP << 16) |
 					  (TCPOPT_SACK << 8) |
 					  (TCPOLEN_SACK_BASE +
-					   (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)));
-		for(this_sack = 0; this_sack < tp->eff_sacks; this_sack++) {
+					   (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)));
+		for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
 			*ptr++ = htonl(sp[this_sack].start_seq);
 			*ptr++ = htonl(sp[this_sack].end_seq);
 		}
-		if (tp->dsack) {
-			tp->dsack = 0;
-			tp->eff_sacks--;
+		if (tp->rx_opt.dsack) {
+			tp->rx_opt.dsack = 0;
+			tp->rx_opt.eff_sacks--;
 		}
 	}
 }
@@ -1862,17 +1902,17 @@ static inline void tcp_synq_drop(struct 
 }
 
 static __inline__ void tcp_openreq_init(struct open_request *req,
-					struct tcp_opt *tp,
+					struct tcp_options_received *rx_opt,
 					struct sk_buff *skb)
 {
 	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
 	req->rcv_isn = TCP_SKB_CB(skb)->seq;
-	req->mss = tp->mss_clamp;
-	req->ts_recent = tp->saw_tstamp ? tp->rcv_tsval : 0;
-	req->tstamp_ok = tp->tstamp_ok;
-	req->sack_ok = tp->sack_ok;
-	req->snd_wscale = tp->snd_wscale;
-	req->wscale_ok = tp->wscale_ok;
+	req->mss = rx_opt->mss_clamp;
+	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
+	req->tstamp_ok = rx_opt->tstamp_ok;
+	req->sack_ok = rx_opt->sack_ok;
+	req->snd_wscale = rx_opt->snd_wscale;
+	req->wscale_ok = rx_opt->wscale_ok;
 	req->acked = 0;
 	req->ecn_ok = 0;
 	req->rmt_port = skb->h.th->source;
@@ -1921,11 +1961,11 @@ static inline int tcp_fin_time(struct tc
 	return fin_timeout;
 }
 
-static inline int tcp_paws_check(struct tcp_opt *tp, int rst)
+static inline int tcp_paws_check(struct tcp_options_received *rx_opt, int rst)
 {
-	if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
+	if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
 		return 0;
-	if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
+	if (xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
 		return 0;
 
 	/* RST segments are not recommended to carry timestamp,
@@ -1940,7 +1980,7 @@ static inline int tcp_paws_check(struct 
 
 	   However, we can relax time bounds for RST segments to MSL.
 	 */
-	if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL)
+	if (rst && xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
 		return 0;
 	return 1;
 }
@@ -1952,6 +1992,8 @@ static inline void tcp_v4_setup_caps(str
 		if (sk->sk_no_largesend || dst->header_len)
 			sk->sk_route_caps &= ~NETIF_F_TSO;
 	}
+	if (!sysctl_tcp_use_sg)
+		sk->sk_route_caps &= ~NETIF_F_SG;
 }
 
 #define TCP_CHECK_TIMER(sk) do { } while (0)
diff -Nurap linux-2.6.9-100.orig/include/net/udp.h linux-2.6.9-ve023stab054/include/net/udp.h
--- linux-2.6.9-100.orig/include/net/udp.h	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/include/net/udp.h	2011-06-15 19:26:19.000000000 +0400
@@ -40,13 +40,19 @@ extern rwlock_t udp_hash_lock;
 
 extern int udp_port_rover;
 
-static inline int udp_lport_inuse(u16 num)
+static inline int udp_hashfn(u16 num, unsigned veid)
+{
+	return ((num + (veid ^ (veid >> 16))) & (UDP_HTABLE_SIZE - 1));
+}
+
+static inline int udp_lport_inuse(u16 num, struct ve_struct *env)
 {
 	struct sock *sk;
 	struct hlist_node *node;
 
-	sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
-		if (inet_sk(sk)->num == num)
+	sk_for_each(sk, node, &udp_hash[udp_hashfn(num, VEID(env))])
+		if (inet_sk(sk)->num == num &&
+		    ve_accessible_strict(VE_OWNER_SK(sk), env))
 			return 1;
 	return 0;
 }
@@ -75,9 +81,14 @@ extern unsigned int udp_poll(struct file
 			     poll_table *wait);
 
 DECLARE_SNMP_STAT(struct udp_mib, udp_statistics);
-#define UDP_INC_STATS(field)		SNMP_INC_STATS(udp_statistics, field)
-#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(udp_statistics, field)
-#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_statistics, field)
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_udp_statistics (get_exec_env()->_udp_statistics)
+#else
+#define ve_udp_statistics udp_statistics
+#endif
+#define UDP_INC_STATS(field)		SNMP_INC_STATS(ve_udp_statistics, field)
+#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_udp_statistics, field)
+#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_udp_statistics, field)
 
 /* /proc */
 struct udp_seq_afinfo {
diff -Nurap linux-2.6.9-100.orig/include/ub/beancounter.h linux-2.6.9-ve023stab054/include/ub/beancounter.h
--- linux-2.6.9-100.orig/include/ub/beancounter.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/beancounter.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,381 @@
+/*
+ *  include/ub/beancounter.h
+ *
+ *  Copyright (C) 1999-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  Andrey Savochkin	saw@sw-soft.com
+ *
+ */
+
+#ifndef _LINUX_BEANCOUNTER_H
+#define _LINUX_BEANCOUNTER_H
+
+/*
+ * Generic ratelimiting stuff.
+ */
+
+struct ub_rate_info {
+	int burst;
+	int interval; /* jiffy_t per event */
+	int bucket; /* kind of leaky bucket */
+	unsigned long last; /* last event */
+};
+
+/* Return true if rate limit permits. */
+int ub_ratelimit(struct ub_rate_info *);
+
+#include <ub/ub_decl.h>
+
+/*
+ * This magic is used to distinuish user beancounter and pages beancounter
+ * in struct page. page_ub and page_bc are placed in union and MAGIC
+ * ensures us that we don't use pbc as ubc in ub_page_uncharge().
+ */
+#define UB_MAGIC		0x62756275
+
+/*
+ *	Resource list.
+ */
+
+#define UB_KMEMSIZE	0	/* Unswappable kernel memory size including
+				 * struct task, page directories, etc.
+				 */
+#define UB_LOCKEDPAGES	1	/* Mlock()ed pages. */
+#define UB_PRIVVMPAGES	2	/* Total number of pages, counting potentially
+				 * private pages as private and used.
+				 */
+#define UB_SHMPAGES	3	/* IPC SHM segment size. */
+#define UB_ZSHMPAGES	4	/* Anonymous shared memory. */
+#define UB_NUMPROC	5	/* Number of processes. */
+#define UB_PHYSPAGES	6	/* All resident pages, for swapout guarantee. */
+#define UB_VMGUARPAGES	7	/* Guarantee for memory allocation,
+				 * checked against PRIVVMPAGES.
+				 */
+#define UB_OOMGUARPAGES	8	/* Guarantees against OOM kill.
+				 * Only limit is used, no accounting.
+				 */
+#define UB_NUMTCPSOCK	9	/* Number of TCP sockets. */
+#define UB_NUMFLOCK	10	/* Number of file locks. */
+#define UB_NUMPTY	11	/* Number of PTYs. */
+#define UB_NUMSIGINFO	12	/* Number of siginfos. */
+#define UB_TCPSNDBUF	13	/* Total size of tcp send buffers. */
+#define UB_TCPRCVBUF	14	/* Total size of tcp receive buffers. */
+#define UB_OTHERSOCKBUF	15	/* Total size of other socket
+				 * send buffers (all buffers for PF_UNIX).
+				 */
+#define UB_DGRAMRCVBUF	16	/* Total size of other socket
+				 * receive buffers.
+				 */
+#define UB_NUMOTHERSOCK	17	/* Number of other sockets. */
+#define UB_DCACHESIZE	18	/* Size of busy dentry/inode cache. */
+#define UB_NUMFILE	19	/* Number of open files. */
+
+#define UB_RESOURCES	24
+
+#define UB_UNUSEDPRIVVM	(UB_RESOURCES + 0)
+#define UB_TMPFSPAGES	(UB_RESOURCES + 1)
+#define UB_SWAPPAGES	(UB_RESOURCES + 2)
+#define UB_HELDPAGES	(UB_RESOURCES + 3)
+
+struct ubparm {
+	/* 
+	 * A barrier over which resource allocations are failed gracefully.
+	 * If the amount of consumed memory is over the barrier further sbrk()
+	 * or mmap() calls fail, the existing processes are not killed. 
+	 */
+	unsigned long	barrier;
+	/* hard resource limit */
+	unsigned long	limit;
+	/* consumed resources */
+	unsigned long	held;
+	/* maximum amount of consumed resources through the last period */
+	unsigned long	maxheld;
+	/* minimum amount of consumed resources through the last period */
+	unsigned long	minheld;
+	/* count of failed charges */
+	unsigned long	failcnt;
+};
+
+/*
+ * Kernel internal part.
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <ub/ub_debug.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/threads.h>
+
+/*
+ * UB_MAXVALUE is essentially LONG_MAX declared in a cross-compiling safe form.
+ */
+#define UB_MAXVALUE	( (1UL << (sizeof(unsigned long)*8-1)) - 1)
+
+
+/*
+ *	Resource management structures
+ * Serialization issues:
+ *   beancounter list management is protected via ub_hash_lock
+ *   task pointers are set only for current task and only once
+ *   refcount is managed atomically
+ *   value and limit comparison and change are protected by per-ub spinlock
+ */
+
+struct page_beancounter;
+struct task_beancounter;
+struct sock_beancounter;
+
+struct page_private {
+	unsigned long		ubp_unused_privvmpages;
+	unsigned long		ubp_tmpfs_respages;
+	unsigned long		ubp_swap_pages;
+	unsigned long long	ubp_held_pages;
+};
+
+struct sock_private {
+	unsigned long		ubp_rmem_thres;
+	unsigned long		ubp_wmem_pressure;
+	unsigned long		ubp_maxadvmss;
+	unsigned long		ubp_rmem_pressure;
+#define UB_RMEM_EXPAND          0
+#define UB_RMEM_KEEP            1
+#define UB_RMEM_SHRINK          2
+	struct list_head	ubp_other_socks;
+	struct list_head	ubp_tcp_socks;
+	atomic_t		ubp_orphan_count;
+};
+
+struct ub_perfstat {
+	unsigned long unmap;
+	unsigned long swapin;
+} ____cacheline_aligned_in_smp;
+
+struct user_beancounter
+{
+	unsigned long		ub_magic;
+	atomic_t		ub_refcount;
+	struct user_beancounter	*ub_next;
+	spinlock_t		ub_lock;
+	uid_t			ub_uid;
+
+	struct ub_rate_info	ub_limit_rl;
+	int			ub_oom_noproc;
+
+	struct page_private	ppriv;
+#define ub_unused_privvmpages	ppriv.ubp_unused_privvmpages
+#define ub_tmpfs_respages	ppriv.ubp_tmpfs_respages
+#define ub_swap_pages		ppriv.ubp_swap_pages
+#define ub_held_pages		ppriv.ubp_held_pages
+	struct sock_private	spriv;
+#define ub_rmem_thres		spriv.ubp_rmem_thres
+#define ub_maxadvmss		spriv.ubp_maxadvmss
+#define ub_rmem_pressure	spriv.ubp_rmem_pressure
+#define ub_wmem_pressure	spriv.ubp_wmem_pressure
+#define ub_tcp_sk_list		spriv.ubp_tcp_socks
+#define ub_other_sk_list	spriv.ubp_other_socks
+#define ub_orphan_count		spriv.ubp_orphan_count
+
+	struct user_beancounter *parent;
+	void			*private_data;
+	unsigned long		ub_aflags;
+
+	/* resources statistic and settings */
+	struct ubparm		ub_parms[UB_RESOURCES];
+	/* resources statistic for last interval */
+	struct ubparm		ub_store[UB_RESOURCES];
+
+	struct ub_perfstat	ub_perfstat[NR_CPUS];
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	struct list_head	ub_cclist;
+	long			ub_pages_charged[NR_CPUS];
+	long			ub_vmalloc_charged[NR_CPUS];
+#endif
+	int			ub_tw_count;
+};
+
+enum ub_severity { UB_HARD, UB_SOFT, UB_FORCE };
+
+#define UB_AFLAG_NOTIF_PAGEIN	0
+
+static inline
+struct user_beancounter *top_beancounter(struct user_beancounter *ub)
+{
+	while (ub->parent != NULL)
+		ub = ub->parent;
+	return ub;
+}
+
+static inline int ub_barrier_hit(struct user_beancounter *ub, int resource)
+{
+	return ub->ub_parms[resource].held > ub->ub_parms[resource].barrier;
+}
+
+static inline int ub_hfbarrier_hit(struct user_beancounter *ub, int resource)
+{
+	return (ub->ub_parms[resource].held > 
+		((ub->ub_parms[resource].barrier) >> 1));
+}
+
+static inline int ub_barrier_farnr(struct user_beancounter *ub, int resource)
+{
+	struct ubparm *p;
+	p = ub->ub_parms + resource;
+	return p->held <= (p->barrier >> 3);
+}
+
+static inline int ub_barrier_farsz(struct user_beancounter *ub, int resource)
+{
+	struct ubparm *p;
+	p = ub->ub_parms + resource;
+	return p->held <= (p->barrier >> 3) && p->barrier >= 1024 * 1024;
+}
+
+#ifndef CONFIG_USER_RESOURCE
+
+extern inline struct user_beancounter *get_beancounter_byuid
+		(uid_t uid, int create) { return NULL; }
+extern inline struct user_beancounter *get_beancounter
+		(struct user_beancounter *ub) { return NULL; }
+extern inline void put_beancounter(struct user_beancounter *ub) { }
+
+static inline void page_ubc_init(void) { }
+static inline void beancounter_init(unsigned long mempages) { }
+static inline void ub0_init(void) { }
+
+static inline int charge_beancounter(struct user_beancounter *ub,
+			int resource, unsigned long val,
+			enum ub_severity strict) { return 0; }
+static inline void uncharge_beancounter(struct user_beancounter *ub,
+			int resource, unsigned long val) { }
+
+static inline void beancounter_proc_init(void) { };
+
+#else /* CONFIG_USER_RESOURCE */
+
+/*
+ *  Charge/uncharge operations
+ */
+
+extern int __charge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val, enum ub_severity strict);
+
+extern void __uncharge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val);
+
+extern void __put_beancounter(struct user_beancounter *ub);
+
+extern void uncharge_warn(struct user_beancounter *ub, int resource,
+		unsigned long val, unsigned long held);
+
+extern const char *ub_rnames[];
+/*
+ *	Put a beancounter reference
+ */
+
+static inline void put_beancounter(struct user_beancounter *ub)
+{
+	if (unlikely(ub == NULL))
+		return;
+
+	__put_beancounter(ub);
+}
+
+/* fast put, refcount can't reach zero */
+static inline void __put_beancounter_batch(struct user_beancounter *ub, int n)
+{
+	atomic_sub(n, &ub->ub_refcount);
+}
+
+static inline void put_beancounter_batch(struct user_beancounter *ub, int n)
+{
+	if (n > 1)
+		__put_beancounter_batch(ub, n - 1);
+	__put_beancounter(ub);
+}
+
+/*
+ *	Create a new beancounter reference
+ */
+extern struct user_beancounter *get_beancounter_byuid(uid_t uid, int create);
+
+static inline 
+struct user_beancounter *get_beancounter(struct user_beancounter *ub)
+{
+	if (unlikely(ub == NULL))
+		return NULL;
+
+	atomic_inc(&ub->ub_refcount);
+	return ub;
+}
+
+static inline void get_beancounter_batch(struct user_beancounter *ub, int n)
+{
+	atomic_add(n, &ub->ub_refcount);
+}
+
+extern struct user_beancounter *get_subbeancounter_byid(
+		struct user_beancounter *,
+		int id, int create);
+extern struct user_beancounter *subbeancounter_findcreate(
+		struct user_beancounter *p, int id);
+
+extern void beancounter_init(unsigned long);
+extern void page_ubc_init(void);
+extern struct user_beancounter ub0;
+extern void ub0_init(void);
+#define get_ub0()	(&ub0)
+
+extern void print_ub_uid(struct user_beancounter *ub, char *buf, int size);
+
+/*
+ *	Resource charging
+ * Change user's account and compare against limits
+ */
+
+static inline void ub_adjust_maxheld(struct user_beancounter *ub, int resource)
+{
+	if (ub->ub_parms[resource].maxheld < ub->ub_parms[resource].held)
+		ub->ub_parms[resource].maxheld = ub->ub_parms[resource].held;
+	if (ub->ub_parms[resource].minheld > ub->ub_parms[resource].held)
+		ub->ub_parms[resource].minheld = ub->ub_parms[resource].held;
+}
+
+int charge_beancounter(struct user_beancounter *ub,
+			int resource, unsigned long val,
+			enum ub_severity strict);
+void uncharge_beancounter(struct user_beancounter *ub,
+			int resource, unsigned long val);
+void __charge_beancounter_notop(struct user_beancounter *ub,
+			int resource, unsigned long val);
+void __uncharge_beancounter_notop(struct user_beancounter *ub,
+			int resource, unsigned long val);
+
+static inline void charge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	if (ub->parent != NULL)
+		__charge_beancounter_notop(ub, resource, val);
+}
+
+static inline void uncharge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	if (ub->parent != NULL)
+		__uncharge_beancounter_notop(ub, resource, val);
+}
+
+extern void beancounter_proc_init(void);
+
+#endif /* CONFIG_USER_RESOURCE */
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_BEANCOUNTER_H */
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_dcache.h linux-2.6.9-ve023stab054/include/ub/ub_dcache.h
--- linux-2.6.9-100.orig/include/ub/ub_dcache.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_dcache.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,48 @@
+/*
+ *  include/ub/ub_dcache.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DCACHE_H_
+#define __UB_DCACHE_H_
+
+#include <ub/ub_decl.h>
+
+/*
+ * UB_DCACHESIZE accounting
+ */
+
+struct dentry_beancounter
+{
+	/*
+	 *  d_inuse =
+	 *         <number of external refs> +
+	 *         <number of 'used' childs>
+	 *
+	 * d_inuse == -1 means that dentry is unused
+	 * state change -1 => 0 causes charge
+	 * state change 0 => -1 causes uncharge
+	 */
+	atomic_t d_inuse;
+	/* charged size, including name length if name is not inline */
+	unsigned long d_ubsize;
+	struct user_beancounter *d_ub;
+};
+
+#ifdef CONFIG_USER_RESOURCE
+
+extern int ub_dentry_on;
+extern void ub_dentry_checkup(void);
+
+#else
+
+static inline void ub_dentry_checkup(void) { }
+
+#endif
+
+#endif /* __UB_DCACHE_H_ */
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_dcache_op.h linux-2.6.9-ve023stab054/include/ub/ub_dcache_op.h
--- linux-2.6.9-100.orig/include/ub/ub_dcache_op.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_dcache_op.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,105 @@
+/*
+ *  include/ub/ub_dcache_op.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DCACHE_OP_H_
+#define __UB_DCACHE_OP_H_
+
+struct dentry;
+
+#ifdef CONFIG_USER_RESOURCE
+
+#include <linux/spinlock.h>
+#include <ub/ub_dcache.h>
+#include <ub/ub_task.h>
+
+extern int ub_dentry_alloc_barrier;
+extern spinlock_t dcache_lock;
+
+static inline int ub_dentry_alloc(struct dentry *d)
+{
+	extern int __ub_dentry_alloc(struct dentry *);
+
+	if (!ub_dentry_on)
+		return 0;
+	return __ub_dentry_alloc(d);
+}
+
+static inline void ub_dentry_free(struct dentry *d)
+{
+}
+
+static inline void ub_dentry_alloc_start(void)
+{
+	extern void __ub_dentry_alloc_start(void);
+
+	if (ub_dentry_alloc_barrier)
+		__ub_dentry_alloc_start();
+}
+
+static inline void ub_dentry_alloc_end(void)
+{
+	extern void __ub_dentry_alloc_end(void);
+
+	if (task_bc(current)->dentry_alloc)
+		__ub_dentry_alloc_end();
+}
+
+static inline int ub_dentry_charge(struct dentry *d)
+{
+	extern int __ub_dentry_charge(struct dentry *);
+
+	if (!ub_dentry_on)
+		return 0;
+	return __ub_dentry_charge(d);
+}
+
+static inline void ub_dentry_charge_nofail(struct dentry *d)
+{
+	extern void __ub_dentry_charge_nofail(struct dentry *);
+
+	if (!ub_dentry_on)
+		return;
+	__ub_dentry_charge_nofail(d);
+}
+
+static inline void ub_dentry_uncharge_locked(struct dentry *d)
+{
+	extern void __ub_dentry_uncharge(struct dentry *);
+
+	if (!ub_dentry_on)
+		return;
+	__ub_dentry_uncharge(d);
+}
+
+static inline void ub_dentry_uncharge(struct dentry *d)
+{
+	extern void __ub_dentry_uncharge(struct dentry *);
+
+	if (!ub_dentry_on)
+		return;
+	spin_lock(&dcache_lock);
+	__ub_dentry_uncharge(d);
+	spin_unlock(&dcache_lock);
+}
+
+#else /* CONFIG_USER_RESOURCE */
+
+static inline int ub_dentry_alloc(struct dentry *d) { return 0; }
+static inline void ub_dentry_free(struct dentry *d) { }
+static inline void ub_dentry_alloc_start(void) { }
+static inline void ub_dentry_alloc_end(void) { }
+static inline int ub_dentry_charge(struct dentry *d) { return 0; }
+static inline void ub_dentry_charge_nofail(struct dentry *d) { }
+static inline void ub_dentry_uncharge_locked(struct dentry *d) { }
+static inline void ub_dentry_uncharge(struct dentry *d) { }
+
+#endif /* CONFIG_USER_RESOURCE */
+
+#endif /* __UB_DCACHE_OP_H_ */
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_debug.h linux-2.6.9-ve023stab054/include/ub/ub_debug.h
--- linux-2.6.9-100.orig/include/ub/ub_debug.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_debug.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,95 @@
+/*
+ *  include/ub/ub_debug.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DEBUG_H_
+#define __UB_DEBUG_H_
+
+/*
+ * general debugging
+ */
+
+#define UBD_ALLOC	0x1
+#define UBD_CHARGE	0x2
+#define UBD_LIMIT	0x4
+#define UBD_TRACE	0x8
+
+/*
+ * ub_net debugging
+ */
+
+#define UBD_NET_SOCKET	0x10
+#define UBD_NET_SLEEP	0x20
+#define UBD_NET_SEND	0x40
+#define UBD_NET_RECV	0x80
+
+/*
+ * Main routines
+ */
+
+#define UB_DEBUG (0)
+#define DEBUG_RESOURCE (0ULL)
+
+#define ub_dbg_cond(__cond, __str, args...)				\
+	do { 								\
+		if ((__cond) != 0)					\
+			printk(__str, ##args);				\
+	} while(0)
+
+#define ub_debug(__section, __str, args...) 				\
+	ub_dbg_cond(UB_DEBUG & (__section), __str, ##args)
+
+#define ub_debug_resource(__resource, __str, args...)			\
+	ub_dbg_cond((UB_DEBUG & UBD_CHARGE) && 				\
+			(DEBUG_RESOURCE & (1 << (__resource))), 	\
+			__str, ##args)
+
+#if UB_DEBUG & UBD_TRACE
+#define ub_debug_trace(__cond, __b, __r)				\
+		do {							\
+			static struct ub_rate_info ri =	{ __b, __r };	\
+			if ((__cond) != 0 && ub_ratelimit(&ri))		\
+				dump_stack(); 				\
+		} while(0)
+#else
+#define ub_debug_trace(__cond, __burst, __rate)
+#endif
+
+#include <linux/config.h>
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+#include <linux/list.h>
+#include <linux/kmem_cache.h>
+
+struct user_beancounter;
+struct ub_cache_counter {
+	struct list_head ulist;
+	struct ub_cache_counter *next;
+	struct user_beancounter *ub;
+	kmem_cache_t *cachep;
+	unsigned long counter;
+};
+
+extern spinlock_t cc_lock;
+extern void init_cache_counters(void);
+extern void ub_free_counters(struct user_beancounter *);
+extern void ub_kmemcache_free(kmem_cache_t *cachep);
+
+struct vm_struct;
+extern void inc_vmalloc_charged(struct vm_struct *, int);
+extern void dec_vmalloc_charged(struct vm_struct *);
+#else
+#define init_cache_counters()		do { } while (0)
+#define inc_vmalloc_charged(vm, f)	do { } while (0)
+#define dec_vmalloc_charged(vm)		do { } while (0)
+#define ub_free_counters(ub)		do { } while (0)
+#define ub_kmemcache_free(cachep)	do { } while (0)
+#endif
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_decl.h linux-2.6.9-ve023stab054/include/ub/ub_decl.h
--- linux-2.6.9-100.orig/include/ub/ub_decl.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_decl.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,42 @@
+/*
+ *  include/ub/ub_decl.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DECL_H_
+#define __UB_DECL_H_
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+
+/*
+ * Naming convension:
+ * ub_<section|object>_<operation>
+ */
+
+#ifdef CONFIG_USER_RESOURCE
+
+#define UB_DECLARE_FUNC(ret_type, decl)	extern ret_type decl;
+#define UB_DECLARE_VOID_FUNC(decl)	extern void decl;
+
+#else /* CONFIG_USER_RESOURCE */
+
+#define UB_DECLARE_FUNC(ret_type, decl)		\
+	static inline ret_type decl		\
+	{					\
+		return (ret_type)0;		\
+	}
+#define UB_DECLARE_VOID_FUNC(decl)		\
+	static inline void decl			\
+	{					\
+	}
+
+#endif /* CONFIG_USER_RESOURCE */
+#endif
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_hash.h linux-2.6.9-ve023stab054/include/ub/ub_hash.h
--- linux-2.6.9-100.orig/include/ub/ub_hash.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_hash.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,41 @@
+/*
+ *  include/ub/ub_hash.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_UBHASH_H
+#define _LINUX_UBHASH_H
+
+#ifdef __KERNEL__
+
+#define UB_HASH_SIZE 256
+
+struct ub_hash_slot {
+	struct user_beancounter *ubh_beans;
+};
+
+extern struct ub_hash_slot ub_hash[];
+extern spinlock_t ub_hash_lock;
+
+#ifdef CONFIG_USER_RESOURCE
+
+/*
+ * Iterate over beancounters
+ * @__slot  - hash slot
+ * @__ubp - beancounter ptr
+ * Can use break :)
+ */
+#define for_each_beancounter(__slot, __ubp)				\
+	for (__slot = 0, __ubp = NULL; 					\
+		__slot < UB_HASH_SIZE && __ubp == NULL; __slot++)	\
+		 for (__ubp = ub_hash[__slot].ubh_beans; __ubp;		\
+				 __ubp = __ubp->ub_next)
+
+#endif /* CONFIG_USER_RESOURCE */
+#endif /* __KERNEL__ */
+#endif /* _LINUX_UBHASH_H */
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_mem.h linux-2.6.9-ve023stab054/include/ub/ub_mem.h
--- linux-2.6.9-100.orig/include/ub/ub_mem.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_mem.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,87 @@
+/*
+ *  include/ub/ub_mem.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_SLAB_H_
+#define __UB_SLAB_H_
+
+#include <linux/config.h>
+#include <linux/kmem_slab.h>
+#include <ub/beancounter.h>
+#include <ub/ub_decl.h>
+
+/*
+ * UB_KMEMSIZE accounting
+ * oom_killer related
+ */
+
+/*
+ * Memory freeing statistics to make correct OOM decision
+ */
+
+struct oom_freeing_stat
+{
+	unsigned long oom_generation; /* current OOM gen */
+	unsigned long freed;
+	unsigned long swapped; /* page referrence counters removed */
+	unsigned long written; /* IO started */
+	unsigned long slabs;   /* slabs shrinked */
+};
+
+extern int oom_generation;
+extern int oom_kill_counter;
+extern spinlock_t oom_generation_lock;
+
+#ifdef CONFIG_UBC_DEBUG_ITEMS 
+#define CHARGE_ORDER(__o)		(1 << (__o))
+#define CHARGE_SIZE(__s)		1
+#else
+#define CHARGE_ORDER(__o)		(PAGE_SIZE << (__o))
+#define CHARGE_SIZE(__s)		(__s)
+#endif
+
+#define page_ub(__page)	((__page)->bc.page_ub)
+
+struct mm_struct;
+struct page;
+
+UB_DECLARE_FUNC(struct user_beancounter *, slab_ub(void *obj))
+UB_DECLARE_FUNC(struct user_beancounter *, vmalloc_ub(void *obj))
+UB_DECLARE_FUNC(struct user_beancounter *, mem_ub(void *obj))
+
+UB_DECLARE_FUNC(int, ub_page_charge(struct page *page, int order, int mask))
+UB_DECLARE_VOID_FUNC(ub_page_uncharge(struct page *page, int order))
+
+UB_DECLARE_VOID_FUNC(ub_clear_oom(void))
+UB_DECLARE_VOID_FUNC(ub_oomkill_task(struct mm_struct *mm, 
+			struct user_beancounter *ub, long overdraft))
+UB_DECLARE_FUNC(int, ub_slab_charge(void *objp, int flags))
+UB_DECLARE_VOID_FUNC(ub_slab_uncharge(void *obj))
+
+#ifdef CONFIG_USER_RESOURCE
+/* Flags without __GFP_UBC must comply with vmalloc */
+#define ub_vmalloc(size) __vmalloc(size, \
+		GFP_KERNEL | __GFP_HIGHMEM | __GFP_UBC, PAGE_KERNEL)
+#define ub_kmalloc(size, flags) kmalloc(size, ((flags) | __GFP_UBC))
+extern struct user_beancounter *ub_select_worst(long *);
+#else
+#define ub_vmalloc(size) vmalloc(size)
+#define ub_kmalloc(size, flags) kmalloc(size, flags)
+static inline struct user_beancounter *ub_select_worst(long *over)
+{
+	*over = 0;
+	return NULL;
+}
+#endif
+
+#define slab_ubcs(cachep, slabp) ((struct user_beancounter **)\
+		(ALIGN((unsigned long)(slab_bufctl(slabp) + (cachep)->num),\
+		       sizeof(void *))))
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_misc.h linux-2.6.9-ve023stab054/include/ub/ub_misc.h
--- linux-2.6.9-100.orig/include/ub/ub_misc.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_misc.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,33 @@
+/*
+ *  include/ub/ub_misc.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_MISC_H_
+#define __UB_MISC_H_
+
+#include <ub/ub_decl.h>
+
+struct tty_struct;
+struct file;
+struct file_lock;
+
+UB_DECLARE_FUNC(int, ub_file_charge(struct file *f))
+UB_DECLARE_VOID_FUNC(ub_file_uncharge(struct file *f))
+UB_DECLARE_FUNC(int, ub_flock_charge(struct file_lock *fl, int hard))
+UB_DECLARE_VOID_FUNC(ub_flock_uncharge(struct file_lock *fl))
+UB_DECLARE_FUNC(int, ub_siginfo_charge(struct user_beancounter *ub,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_siginfo_uncharge(struct user_beancounter *ub,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_task_charge(struct task_struct *parent,
+			struct task_struct *task))
+UB_DECLARE_VOID_FUNC(ub_task_uncharge(struct task_struct *task))
+UB_DECLARE_FUNC(int, ub_pty_charge(struct tty_struct *tty))
+UB_DECLARE_VOID_FUNC(ub_pty_uncharge(struct tty_struct *tty))
+#endif
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_net.h linux-2.6.9-ve023stab054/include/ub/ub_net.h
--- linux-2.6.9-100.orig/include/ub/ub_net.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_net.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,215 @@
+/*
+ *  include/ub/ub_net.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_NET_H_
+#define __UB_NET_H_
+
+/*
+ * UB_NUMXXXSOCK, UB_XXXBUF accounting
+ */
+
+#include <ub/ub_decl.h>
+#include <ub/ub_sk.h>
+#include <ub/beancounter.h>
+
+#define bid2sid(__bufid) \
+	((__bufid) == UB_TCPSNDBUF ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK)
+
+#define SOCK_MIN_UBCSPACE ((int)((2048 - sizeof(struct skb_shared_info)) & \
+			~(SMP_CACHE_BYTES-1)))
+#define SOCK_MIN_UBCSPACE_CH skb_charge_size(SOCK_MIN_UBCSPACE)
+
+static inline int ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask)
+{
+#ifdef CONFIG_USER_RESOURCE
+	memset(skb_bc(skb), 0, sizeof(struct skb_beancounter));
+#endif
+	return 0;
+}
+
+static inline void ub_skb_free_bc(struct sk_buff *skb)
+{
+}
+
+#define IS_TCP_SOCK(__family, __type) \
+		((__family) == PF_INET && (__type) == SOCK_STREAM)
+
+/* number of sockets */
+UB_DECLARE_FUNC(int, ub_sock_charge(struct sock *sk, int family, int type))
+UB_DECLARE_FUNC(int, ub_tcp_sock_charge(struct sock *sk)) 
+UB_DECLARE_FUNC(int, ub_other_sock_charge(struct sock *sk))
+UB_DECLARE_VOID_FUNC(ub_sock_uncharge(struct sock *sk))
+
+/* management of queue for send space */
+UB_DECLARE_FUNC(long, ub_sock_wait_for_space(struct sock *sk, long timeo, 
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_sock_snd_queue_add(struct sock *sk, int resource, 
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_sock_sndqueuedel(struct sock *sk))
+
+/* send space */
+UB_DECLARE_FUNC(int, ub_sock_make_wreserv(struct sock *sk, int bufid,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_sock_get_wreserv(struct sock *sk, int bufid,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_sock_ret_wreserv(struct sock *sk, int bufid,
+			unsigned long size, unsigned long ressize))
+UB_DECLARE_FUNC(int, ub_sock_tcp_chargesend(struct sock *sk,
+			struct sk_buff *skb, enum ub_severity strict))
+UB_DECLARE_VOID_FUNC(ub_sock_tcp_unchargesend(struct sock *sk,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_sock_tcp_chargepage(struct sock *sk))
+UB_DECLARE_VOID_FUNC(ub_sock_tcp_detachpage(struct sock *sk))
+
+UB_DECLARE_FUNC(int, ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk))
+
+/* receive space */
+UB_DECLARE_FUNC(int, ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb))
+UB_DECLARE_FUNC(int, ub_sock_tcp_chargerecv(struct sock *sk,
+			struct sk_buff *skb, enum ub_severity strict))
+
+/* skb destructor */
+UB_DECLARE_VOID_FUNC(ub_skb_uncharge(struct sk_buff *skb))
+
+static inline int ub_sock_makewres_other(struct sock *sk, unsigned long size)
+{
+	return ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size);
+}
+
+static inline int ub_sock_makewres_tcp(struct sock *sk, unsigned long size)
+{
+	return ub_sock_make_wreserv(sk, UB_TCPSNDBUF, size);
+}
+
+UB_DECLARE_FUNC(int, ub_sock_getwres_other(struct sock *sk,
+			unsigned long size))
+
+static inline int ub_sock_getwres_tcp(struct sock *sk, unsigned long size)
+{
+	return ub_sock_get_wreserv(sk, UB_TCPSNDBUF, size);
+}
+
+UB_DECLARE_VOID_FUNC(ub_sock_retwres_other(struct sock *sk,
+			unsigned long size, unsigned long ressize))
+
+static inline void ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
+		unsigned long ressize)
+{
+	ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, ressize);
+}
+
+static inline int ub_sock_sndqueueadd_other(struct sock *sk, unsigned long sz)
+{
+	return ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, sz);
+}
+
+static inline int ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz)
+{
+	return ub_sock_snd_queue_add(sk, UB_TCPSNDBUF, sz);
+}
+
+static inline int ub_tcpsndbuf_charge(struct sock *sk,
+		struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargesend(sk, skb, UB_HARD);
+}
+
+static inline int ub_tcpsndbuf_charge_forced(struct sock *sk,
+		struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargesend(sk, skb, UB_FORCE);
+}
+
+static inline int ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargerecv(sk, skb, UB_SOFT);
+}
+
+static inline int ub_tcprcvbuf_charge_forced(struct sock *sk,
+		struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargerecv(sk, skb, UB_FORCE);
+}
+
+/* Charge size */
+static inline unsigned long skb_charge_datalen(unsigned long chargesize)
+{
+#ifdef CONFIG_USER_RESOURCE
+	unsigned long slabsize;
+
+	chargesize -= sizeof(struct sk_buff);
+	slabsize = 64;
+	do { 
+		slabsize <<= 1; 
+	} while (slabsize <= chargesize);
+
+	slabsize >>= 1;
+	return (slabsize - sizeof(struct skb_shared_info)) &
+		~(SMP_CACHE_BYTES-1);
+#else
+	return 0;
+#endif
+}
+
+static inline unsigned long skb_charge_size_gen(unsigned long size)
+{ 
+#ifdef CONFIG_USER_RESOURCE
+	unsigned int slabsize;
+
+	size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
+	slabsize = 32; /* min size is 64 because of skb_shared_info */
+	do { 
+		slabsize <<= 1; 
+	} while (slabsize < size);
+
+	return slabsize + sizeof(struct sk_buff);
+#else
+	return 0;
+#endif
+
+}
+	
+static inline unsigned long skb_charge_size_const(unsigned long size)
+{
+#ifdef CONFIG_USER_RESOURCE
+	unsigned int ret;
+	if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 64)
+		ret = 64 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 128)
+		ret = 128 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 256)
+		ret = 256 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 512)
+		ret = 512 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 1024)
+		ret = 1024 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 2048)
+		ret = 2048 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 4096)
+		ret = 4096 + sizeof(struct sk_buff);
+	else
+		ret = skb_charge_size_gen(size);
+	return ret;
+#else
+	return 0;
+#endif
+}
+
+
+#define skb_charge_size(__size)			\
+	(__builtin_constant_p(__size)	?	\
+	 skb_charge_size_const(__size)	:	\
+	 skb_charge_size_gen(__size))
+
+UB_DECLARE_FUNC(int, skb_charge_fullsize(struct sk_buff *skb))
+UB_DECLARE_VOID_FUNC(ub_skb_set_charge(struct sk_buff *skb, 
+			struct sock *sk, unsigned long size, int res))
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_orphan.h linux-2.6.9-ve023stab054/include/ub/ub_orphan.h
--- linux-2.6.9-100.orig/include/ub/ub_orphan.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_orphan.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,103 @@
+/*
+ *  include/ub/ub_orphan.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_ORPHAN_H_
+#define __UB_ORPHAN_H_
+
+#include "ub/beancounter.h"
+#include "ub/ub_net.h"
+
+
+extern int ub_too_many_orphans(struct sock *sk, int count);
+static inline int tcp_too_many_orphans(struct sock *sk, int count)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (ub_too_many_orphans(sk, count))
+		return 1;
+#endif
+	return (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
+		(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+		 atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]));
+}
+
+static inline atomic_t *tcp_get_orphan_count_ptr(struct sock *sk)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return &sock_bc(sk)->ub->ub_orphan_count;
+#endif
+	return &tcp_orphan_count;
+}
+
+static inline void tcp_inc_orphan_count(struct sock *sk)
+{
+	atomic_inc(tcp_get_orphan_count_ptr(sk));
+}
+
+static inline void tcp_dec_orphan_count(struct sock *sk)
+{
+	atomic_dec(tcp_get_orphan_count_ptr(sk));
+}
+
+static inline int tcp_get_orphan_count(struct sock *sk)
+{
+	return atomic_read(tcp_get_orphan_count_ptr(sk));
+}
+
+
+/* ub->ub_tw_count is serialized via tw_death_lock thats why the logic is
+   so strange and places are very interesting. Den */
+#ifdef CONFIG_USER_RESOURCE
+
+#include <ub/ub_mem.h>
+#include <linux/kmem_cache.h>
+
+struct tcp_tw_bucket;
+static inline void ub_inc_tw_count(struct tcp_tw_bucket *tw)
+{
+	struct user_beancounter *ub;
+	ub = slab_ub(tw);
+	if (ub == NULL)
+		return;
+	ub->ub_tw_count++;
+}
+
+static inline void ub_dec_tw_count(struct tcp_tw_bucket *tw)
+{
+	struct user_beancounter *ub;
+	ub = slab_ub(tw);
+	if (ub == NULL)
+		return;
+	ub->ub_tw_count--;
+}
+
+static inline int ub_check_tw_count(struct sock *sk)
+{
+	unsigned long mem_max, mem;
+	struct user_beancounter *ub;
+
+	ub = sock_bc(sk)->ub;
+	if (ub == NULL)
+		return 1;
+	mem_max = sysctl_tcp_max_tw_kmem_fraction *
+	       ((ub->ub_parms[UB_KMEMSIZE].limit >> 10) + 1);
+	mem = ub->ub_tw_count * kmem_cache_memusage(tcp_timewait_cachep);
+	return ub->ub_tw_count < sysctl_tcp_max_tw_buckets_ve && mem < mem_max;
+}
+
+#else /* CONFIG_USER_RESOURCE */
+
+#define ub_inc_tw_count(tw)	do { } while (0)
+#define ub_dec_tw_count(tw)	do { } while (0)
+#define ub_check_tw_count(sk)	(1)
+
+#endif
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_page.h linux-2.6.9-ve023stab054/include/ub/ub_page.h
--- linux-2.6.9-100.orig/include/ub/ub_page.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_page.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,48 @@
+/*
+ *  include/ub/ub_page.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_PAGE_H_
+#define __UB_PAGE_H_
+
+#include <linux/config.h>
+
+/*
+ * Page_beancounters
+ */
+
+struct page;
+struct user_beancounter;
+
+#define PB_MAGIC 0x62700001UL
+
+struct page_beancounter {
+	unsigned long pb_magic;
+	struct page *page;
+	struct user_beancounter *ub;
+	struct page_beancounter *next_hash;
+	unsigned refcount;
+	struct list_head page_list;
+};
+
+#define PB_REFCOUNT_BITS 24
+#define PB_SHIFT_GET(c) ((c) >> PB_REFCOUNT_BITS)
+#define PB_SHIFT_INC(c) ((c) += (1 << PB_REFCOUNT_BITS))
+#define PB_SHIFT_DEC(c) ((c) -= (1 << PB_REFCOUNT_BITS))
+#define PB_COUNT_GET(c) ((c) & ((1 << PB_REFCOUNT_BITS) - 1))
+#define PB_COUNT_INC(c) ((c)++)
+#define PB_COUNT_DEC(c) ((c)--)
+#define PB_REFCOUNT_MAKE(s, c) (((s) << PB_REFCOUNT_BITS) + (c))
+
+#define page_pbc(__page)	((__page)->bc.page_pbc)
+
+struct address_space;
+extern int is_shmem_mapping(struct address_space *);
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_sk.h linux-2.6.9-ve023stab054/include/ub/ub_sk.h
--- linux-2.6.9-100.orig/include/ub/ub_sk.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_sk.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,50 @@
+/*
+ *  include/ub/ub_sk.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_SK_H_
+#define __UB_SK_H_
+
+#include <linux/config.h>
+#include <ub/ub_task.h>
+
+struct sock;
+struct sk_buff;
+
+struct skb_beancounter {
+	struct user_beancounter *ub;
+	unsigned long charged:27, resource:5;
+};
+
+struct sock_beancounter {
+	struct user_beancounter *ub;
+	/*
+	 * poll_reserv accounts space already charged for future sends.
+	 * It is required to make poll agree with sendmsg.
+	 * Additionally, it makes real charges (with taking bc spinlock)
+	 * in the send path rarer, speeding networking up.
+	 * For TCP (only): changes are protected by socket lock (not bc!)
+	 * For all proto: may be read without serialization in poll.
+	 */
+	unsigned long           poll_reserv;
+	unsigned long		forw_space;
+	/* fields below are protected by bc spinlock */
+	unsigned long           ub_waitspc;     /* space waiting for */
+	unsigned long           ub_wcharged;
+	struct list_head        ub_sock_list;
+};
+
+#define sock_bc(__sk)		(&(__sk)->sk_bc)
+#define skb_bc(__skb)		(&(__skb)->skb_bc)
+#define skbc_sock(__skbc)	(container_of(__skbc, struct sock, sk_bc))
+#define sock_has_ubc(__sk)	(sock_bc(__sk)->ub != NULL)
+
+#define	set_sk_exec_ub(__sk)	(set_exec_ub(sock_bc(sk)->ub))
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_stat.h linux-2.6.9-ve023stab054/include/ub/ub_stat.h
--- linux-2.6.9-100.orig/include/ub/ub_stat.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_stat.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,70 @@
+/*
+ *  include/ub/ub_stat.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_STAT_H_
+#define __UB_STAT_H_
+
+/* sys_ubstat commands list */
+#define UBSTAT_READ_ONE			0x010000
+#define UBSTAT_READ_ALL			0x020000
+#define UBSTAT_READ_FULL		0x030000
+#define UBSTAT_UBLIST			0x040000
+#define UBSTAT_UBPARMNUM		0x050000
+#define UBSTAT_GETTIME			0x060000
+
+#define UBSTAT_CMD(func)		((func) & 0xF0000)
+#define UBSTAT_PARMID(func)		((func) & 0x0FFFF)
+
+#define TIME_MAX_SEC		(LONG_MAX / HZ)
+#define TIME_MAX_JIF		(TIME_MAX_SEC * HZ)
+
+typedef unsigned long ubstattime_t;
+
+typedef struct {
+	ubstattime_t	start_time;
+	ubstattime_t	end_time;
+	ubstattime_t	cur_time;
+} ubgettime_t;
+
+typedef struct {
+	long		maxinterval;
+	int		signum;
+} ubnotifrq_t;
+
+typedef struct {
+	unsigned long	maxheld;
+	unsigned long	failcnt;
+} ubstatparm_t;
+
+typedef struct {
+	unsigned long	barrier;
+	unsigned long	limit;
+	unsigned long	held;
+	unsigned long	maxheld;
+	unsigned long	minheld;
+	unsigned long	failcnt;
+	unsigned long __unused1;
+	unsigned long __unused2;
+} ubstatparmf_t;
+
+typedef struct {
+	ubstattime_t	start_time;
+	ubstattime_t	end_time;
+	ubstatparmf_t	param[0];
+} ubstatfull_t;
+
+#ifdef __KERNEL__
+struct ub_stat_notify {
+	struct list_head	list;
+	struct task_struct	*task;
+	int			signum;
+};
+#endif
+#endif
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_task.h linux-2.6.9-ve023stab054/include/ub/ub_task.h
--- linux-2.6.9-100.orig/include/ub/ub_task.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_task.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,56 @@
+/*
+ *  include/ub/ub_task.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_TASK_H_
+#define __UB_TASK_H_
+
+#include <linux/config.h>
+
+struct user_beancounter;
+
+
+#ifdef CONFIG_USER_RESOURCE
+
+struct task_beancounter {
+	struct user_beancounter	*exec_ub;
+	struct user_beancounter	*task_ub;
+	struct user_beancounter *fork_sub;
+	unsigned long file_precharged, file_quant, file_count;
+	unsigned long kmem_precharged;
+	char dentry_alloc, pgfault_handle;
+	void *task_fnode, *task_freserv;
+	unsigned long task_data[4];
+	unsigned long pgfault_allot;
+};
+
+#define task_bc(__tsk) (&((__tsk)->task_bc)) 
+
+#define get_exec_ub()		(task_bc(current)->exec_ub)
+#define get_task_ub(__task)	(task_bc(__task)->task_ub)
+#define set_exec_ub(__newub)		\
+({					\
+	struct user_beancounter *old;	\
+	struct task_beancounter *tbc;	\
+	tbc = task_bc(current);		\
+	old = tbc->exec_ub;		\
+	tbc->exec_ub = __newub;		\
+	old;				\
+})
+
+void ub_init_task_bc(struct task_beancounter *);
+
+#else /* CONFIG_USER_RESOURCE */
+
+#define get_exec_ub()		(NULL)
+#define get_task_ub(task)	(NULL)
+#define set_exec_ub(__ub)	(NULL)
+
+#endif /* CONFIG_USER_RESOURCE */
+#endif /* __UB_TASK_H_ */
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_tcp.h linux-2.6.9-ve023stab054/include/ub/ub_tcp.h
--- linux-2.6.9-100.orig/include/ub/ub_tcp.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_tcp.h	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,76 @@
+/*
+ *  include/ub/ub_tcp.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_TCP_H_
+#define __UB_TCP_H_
+
+/*
+ * UB_NUMXXXSOCK, UB_XXXBUF accounting
+ */
+
+#include <ub/ub_sk.h>
+#include <ub/beancounter.h>
+
+static inline void ub_tcp_update_maxadvmss(struct sock *sk)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (!sock_has_ubc(sk))
+		return;
+	if (sock_bc(sk)->ub->ub_maxadvmss >= tcp_sk(sk)->advmss)
+		return;
+
+	sock_bc(sk)->ub->ub_maxadvmss =
+		skb_charge_size(MAX_HEADER + sizeof(struct iphdr)
+				+ sizeof(struct tcphdr)	+ tcp_sk(sk)->advmss);
+#endif
+}
+
+static inline int ub_tcp_rmem_allows_expand(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 0;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk)) {
+		struct user_beancounter *ub;
+
+		ub = sock_bc(sk)->ub;
+		if (ub->ub_rmem_pressure == UB_RMEM_EXPAND)
+			return 1;
+		if (ub->ub_rmem_pressure == UB_RMEM_SHRINK)
+			return 0;
+		return sk->sk_rcvbuf <= ub->ub_rmem_thres;
+	}
+#endif
+	return 1;
+}
+
+static inline int ub_tcp_memory_pressure(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return sock_bc(sk)->ub->ub_rmem_pressure != UB_RMEM_EXPAND;
+#endif
+	return 0;
+}
+
+static inline int ub_tcp_shrink_rcvbuf(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return sock_bc(sk)->ub->ub_rmem_pressure == UB_RMEM_SHRINK;
+#endif
+	return 0;
+}
+
+#endif
diff -Nurap linux-2.6.9-100.orig/include/ub/ub_vmpages.h linux-2.6.9-ve023stab054/include/ub/ub_vmpages.h
--- linux-2.6.9-100.orig/include/ub/ub_vmpages.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/include/ub/ub_vmpages.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,122 @@
+/*
+ *  include/ub/ub_vmpages.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_PAGES_H_
+#define __UB_PAGES_H_
+
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <ub/beancounter.h>
+#include <ub/ub_decl.h>
+
+/*
+ * UB_XXXPAGES
+ */
+
+/*
+ * Check whether vma has private or copy-on-write mapping.
+ * Should match checks in ub_protected_charge().
+ */
+#define VM_UB_PRIVATE(__flags, __file)					\
+		( ((__flags) & VM_WRITE) ?				\
+			(__file) == NULL || !((__flags) & VM_SHARED) :	\
+			0						\
+		)
+
+#define UB_PAGE_WEIGHT_SHIFT 24
+#define UB_PAGE_WEIGHT (1 << UB_PAGE_WEIGHT_SHIFT)
+
+struct page_beancounter;
+
+/* Mprotect charging result */
+#define PRIVVM_ERROR		-1
+#define PRIVVM_NO_CHARGE	0
+#define PRIVVM_TO_PRIVATE	1
+#define PRIVVM_TO_SHARED	2
+ 
+#ifdef CONFIG_USER_RESOURCE
+extern int ub_protected_charge(struct user_beancounter *ub, unsigned long size,
+		unsigned long newflags,  struct vm_area_struct *vma);
+#else
+static inline int ub_protected_charge(struct user_beancounter *ub,
+		unsigned long size, unsigned long flags,
+		struct vm_area_struct *vma)
+{
+	return PRIVVM_NO_CHARGE;
+}
+#endif
+
+UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_inc(struct user_beancounter *ub, 
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_dec(struct user_beancounter *ub,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_shmpages_charge(struct user_beancounter *ub,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_shmpages_uncharge(struct user_beancounter *ub,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_locked_mem_charge(struct user_beancounter *ub, long sz))
+UB_DECLARE_VOID_FUNC(ub_locked_mem_uncharge(struct user_beancounter *ub, 
+			long size))
+UB_DECLARE_FUNC(int, ub_privvm_charge(struct user_beancounter *ub,
+			unsigned long flags, struct file *file,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_privvm_uncharge(struct user_beancounter *ub,
+			unsigned long flags, struct file *file,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_unused_privvm_inc(struct user_beancounter * ub, 
+			long size, struct vm_area_struct *vma))
+UB_DECLARE_VOID_FUNC(ub_unused_privvm_dec(struct user_beancounter *ub, long sz,
+			struct vm_area_struct *vma))
+UB_DECLARE_VOID_FUNC(__ub_unused_privvm_dec(struct user_beancounter *ub, long sz))
+UB_DECLARE_FUNC(int, ub_memory_charge(struct user_beancounter * ub,
+			unsigned long size, unsigned vm_flags,
+			struct file *vm_file, int strict))
+UB_DECLARE_VOID_FUNC(ub_memory_uncharge(struct user_beancounter * ub,
+			unsigned long size, unsigned vm_flags,
+			struct file *vm_file))
+UB_DECLARE_FUNC(unsigned long, pages_in_vma_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end))
+#define pages_in_vma(vma) \
+		(pages_in_vma_range((vma), (vma)->vm_start, (vma)->vm_end))
+
+extern void fastcall __ub_update_physpages(struct user_beancounter *ub);
+extern void fastcall __ub_update_oomguarpages(struct user_beancounter *ub);
+extern void fastcall __ub_update_privvm(struct user_beancounter *ub);
+
+#ifdef CONFIG_USER_SWAP_ACCOUNTING
+extern void ub_swapentry_inc(struct user_beancounter *ub);
+extern void ub_swapentry_dec(struct user_beancounter *ub);
+#endif
+
+#ifdef CONFIG_USER_RSS_ACCOUNTING
+#define PB_DECLARE_FUNC(ret, decl)	UB_DECLARE_FUNC(ret, decl)
+#define PB_DECLARE_VOID_FUNC(decl)	UB_DECLARE_VOID_FUNC(decl)
+#else
+#define PB_DECLARE_FUNC(ret, decl)	static inline ret decl {return (ret)0;}
+#define PB_DECLARE_VOID_FUNC(decl)	static inline void decl { }
+#endif
+
+PB_DECLARE_FUNC(int, pb_reserve_all(struct page_beancounter **pbc))
+PB_DECLARE_FUNC(int, pb_alloc(struct page_beancounter **pbc))
+PB_DECLARE_FUNC(int, pb_alloc_list(struct page_beancounter **pbc, int num,
+			struct mm_struct *mm))
+PB_DECLARE_FUNC(int, pb_add_ref(struct page *page, struct user_beancounter *ub,
+			struct page_beancounter **pbc))
+PB_DECLARE_VOID_FUNC(pb_free_list(struct page_beancounter **pb))
+PB_DECLARE_VOID_FUNC(pb_free(struct page_beancounter **pb))
+PB_DECLARE_VOID_FUNC(pb_add_list_ref(struct page *page, 
+			struct user_beancounter *src_ub, 
+			struct user_beancounter *ub, 
+			struct page_beancounter **pbc))
+PB_DECLARE_VOID_FUNC(pb_remove_ref(struct page *page, 
+			struct user_beancounter *ub))
+PB_DECLARE_FUNC(struct user_beancounter *, pb_grab_page_ub(struct page *page))
+
+#endif
diff -Nurap linux-2.6.9-100.orig/init/Kconfig linux-2.6.9-ve023stab054/init/Kconfig
--- linux-2.6.9-100.orig/init/Kconfig	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/init/Kconfig	2011-06-15 19:26:22.000000000 +0400
@@ -223,6 +223,25 @@ config HOTPLUG
 	  agent" (/sbin/hotplug) to load modules and set up software needed
 	  to use devices as you hotplug them.
 
+config KOBJECT_UEVENT
+	bool "Kernel Userspace Events"
+	depends on NET
+	default y
+	help
+	  This option enables the kernel userspace event layer, which is a
+	  simple mechanism for kernel-to-user communication over a netlink
+	  socket.
+	  The goal of the kernel userspace events layer is to provide a simple
+	  and efficient events system, that notifies userspace about kobject
+	  state changes. This will enable applications to just listen for
+	  events instead of polling system devices and files.
+	  Hotplug events (kobject addition and removal) are also available on
+	  the netlink socket in addition to the execution of /sbin/hotplug if
+	  CONFIG_HOTPLUG is enabled.
+
+	  Say Y, unless you are building a system requiring minimal memory
+	  consumption.
+
 config IKCONFIG
 	bool "Kernel .config support"
 	---help---
diff -Nurap linux-2.6.9-100.orig/init/do_mounts.c linux-2.6.9-ve023stab054/init/do_mounts.c
--- linux-2.6.9-100.orig/init/do_mounts.c	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/init/do_mounts.c	2011-06-15 19:26:18.000000000 +0400
@@ -142,7 +142,7 @@ dev_t __init name_to_dev_t(char *name)
 	int part;
 
 #ifdef CONFIG_SYSFS
-	sys_mkdir("/sys", 0700);
+	int mkdir_err = sys_mkdir("/sys", 0700);
 	if (sys_mount("sysfs", "/sys", "sysfs", 0, NULL) < 0)
 		goto out;
 #endif
@@ -197,7 +197,8 @@ done:
 #ifdef CONFIG_SYSFS
 	sys_umount("/sys", 0);
 out:
-	sys_rmdir("/sys");
+	if (!mkdir_err)
+		sys_rmdir("/sys");
 #endif
 	return res;
 fail:
diff -Nurap linux-2.6.9-100.orig/init/do_mounts_initrd.c linux-2.6.9-ve023stab054/init/do_mounts_initrd.c
--- linux-2.6.9-100.orig/init/do_mounts_initrd.c	2004-10-19 01:53:11.000000000 +0400
+++ linux-2.6.9-ve023stab054/init/do_mounts_initrd.c	2011-06-15 19:26:19.000000000 +0400
@@ -10,7 +10,7 @@
 
 #include "do_mounts.h"
 
-unsigned long initrd_start, initrd_end;
+unsigned long initrd_start, initrd_end, initrd_copy;
 int initrd_below_start_ok;
 unsigned int real_root_dev;	/* do_proc_dointvec cannot handle kdev_t */
 static int __initdata old_fd, root_fd;
diff -Nurap linux-2.6.9-100.orig/init/main.c linux-2.6.9-ve023stab054/init/main.c
--- linux-2.6.9-100.orig/init/main.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/init/main.c	2011-06-15 19:26:19.000000000 +0400
@@ -51,6 +51,8 @@
 #include <asm/setup.h>
 #include <asm/timex.h>
 
+#include <ub/beancounter.h>
+
 /*
  * This is one of the first .c files built. Error out early
  * if we have compiler trouble..
@@ -87,6 +89,7 @@ extern void sbus_init(void);
 extern void sysctl_init(void);
 extern void signals_init(void);
 extern void buffer_init(void);
+extern void fairsched_init_late(void);
 extern void pidhash_init(void);
 extern void pidmap_init(void);
 extern void prio_tree_init(void);
@@ -108,6 +111,16 @@ extern void tc_init(void);
 enum system_states system_state;
 EXPORT_SYMBOL(system_state);
 
+#ifdef CONFIG_VE
+extern void init_ve_system(void);
+#endif
+
+void prepare_ve0_process(struct task_struct *tsk);
+void prepare_ve0_proc_root(void);
+void prepare_ve0_sysctl(void);
+void prepare_ve0_loopback(void);
+void prepare_virtual_fs(void);	
+
 /*
  * Boot command-line arguments
  */
@@ -200,6 +213,52 @@ unsigned long loops_per_jiffy = (1<<12);
 
 EXPORT_SYMBOL(loops_per_jiffy);
 
+unsigned long cycles_per_jiffy, cycles_per_clock;
+
+void calibrate_cycles(void)
+{
+	unsigned long ticks;
+	cycles_t time;
+
+	ticks = jiffies;
+	while (ticks == jiffies)
+		/* nothing */;
+	time = get_cycles();
+	ticks = jiffies;
+	while (ticks == jiffies)
+		/* nothing */;
+
+	time = get_cycles() - time;
+	cycles_per_jiffy = time;
+	if ((time >> 32) != 0) {
+		printk("CPU too fast! timings are incorrect\n");
+		cycles_per_jiffy = -1;
+	}
+}
+
+EXPORT_SYMBOL(cycles_per_jiffy);
+
+void calc_cycles_per_jiffy(void)
+{
+#if defined(__i386__)
+	extern unsigned long fast_gettimeoffset_quotient;
+	unsigned long low, high;
+
+	if (fast_gettimeoffset_quotient != 0) {
+		__asm__("divl %2"
+			:"=a" (low), "=d" (high)
+			:"r" (fast_gettimeoffset_quotient),
+			"0" (0), "1" (1000000/HZ));
+
+		cycles_per_jiffy = low;
+	}
+#endif
+	if (cycles_per_jiffy == 0)
+		calibrate_cycles();
+
+	cycles_per_clock = cycles_per_jiffy * (HZ / CLOCKS_PER_SEC);
+}
+
 #ifdef ARCH_HAS_READ_CURRENT_TIMER
 
 /* This routine uses the read_current_timer() routine and gets the
@@ -356,6 +415,7 @@ void __devinit calibrate_delay(void)
 			loops_per_jiffy);
 	}
 
+	calc_cycles_per_jiffy();
 }
 
 static int __init debug_kernel(char *str)
@@ -584,6 +644,7 @@ asmlinkage void __init start_kernel(void
  * enable them
  */
 	lock_kernel();
+	ub0_init();
 	page_address_init();
 	printk(linux_banner);
 	setup_arch(&command_line);
@@ -595,6 +656,8 @@ asmlinkage void __init start_kernel(void
 	 */
 	smp_prepare_boot_cpu();
 
+	prepare_ve0_process(&init_task);
+
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
@@ -652,6 +715,7 @@ asmlinkage void __init start_kernel(void
 #endif
 	fork_init(num_physpages);
 	proc_caches_init();
+	beancounter_init(num_physpages);
 	buffer_init();
 	unnamed_dev_init();
 	key_init();
@@ -662,13 +726,17 @@ asmlinkage void __init start_kernel(void
 	/* rootfs populating might need page-writeback */
 	page_writeback_init();
 #ifdef CONFIG_PROC_FS
+	prepare_ve0_proc_root();
+	prepare_ve0_sysctl();
 	proc_root_init();
+	beancounter_proc_init();
 #endif
 	check_bugs();
 
 	acpi_early_init(); /* before LAPIC and SMP init */
 
 	/* Do the rest non-__init'ed, we're now alive */
+	page_ubc_init();
 	rest_init();
 }
 
@@ -730,6 +798,9 @@ static void __init do_initcalls(void)
  */
 static void __init do_basic_setup(void)
 {
+	prepare_ve0_loopback();
+	init_ve_system();
+
 	/* drivers will send hotplug events */
 	init_workqueues();
 	usermodehelper_init();
@@ -749,7 +820,7 @@ static void __init do_basic_setup(void)
 static void do_pre_smp_initcalls(void)
 {
 	extern int spawn_ksoftirqd(void);
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
 	extern int migration_init(void);
 
 	migration_init();
@@ -802,6 +873,12 @@ static int init(void * unused)
 	fixup_cpu_present_map();
 	smp_init();
 
+	/* 
+	 * This should be done after all cpus are known to
+	 * be online.  smp_init gives us confidence in it.
+	 */
+	fairsched_init_late();
+
 	/*
 	 * Do this before initcalls, because some drivers want to access
 	 * firmware files.
diff -Nurap linux-2.6.9-100.orig/init/version.c linux-2.6.9-ve023stab054/init/version.c
--- linux-2.6.9-100.orig/init/version.c	2004-10-19 01:54:08.000000000 +0400
+++ linux-2.6.9-ve023stab054/init/version.c	2011-06-15 19:26:19.000000000 +0400
@@ -28,6 +28,12 @@ struct new_utsname system_utsname = {
 
 EXPORT_SYMBOL(system_utsname);
 
+struct new_utsname virt_utsname = {
+	/* we need only this field */
+	.release        = UTS_RELEASE,
+};
+EXPORT_SYMBOL(virt_utsname);
+
 const char *linux_banner = 
 	"Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 	LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
diff -Nurap linux-2.6.9-100.orig/ipc/compat.c linux-2.6.9-ve023stab054/ipc/compat.c
--- linux-2.6.9-100.orig/ipc/compat.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/ipc/compat.c	2011-06-15 19:26:19.000000000 +0400
@@ -33,6 +33,8 @@
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 
+#include <linux/ve_owner.h>
+
 #include "util.h"
 
 struct compat_msgbuf {
diff -Nurap linux-2.6.9-100.orig/ipc/mqueue.c linux-2.6.9-ve023stab054/ipc/mqueue.c
--- linux-2.6.9-100.orig/ipc/mqueue.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/ipc/mqueue.c	2011-06-15 19:26:19.000000000 +0400
@@ -635,7 +635,8 @@ static int oflag2acc[O_ACCMODE] = { MAY_
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
+	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE],
+				NULL, NULL)) {
 		dput(dentry);
 		mntput(mqueue_mnt);
 		return ERR_PTR(-EACCES);
@@ -1015,7 +1016,7 @@ retry:
 				goto out;
 			}
 
-			ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT);
+			ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT, NULL);
 			if (ret == 1)
 		       		goto retry;
 			if (ret) {
diff -Nurap linux-2.6.9-100.orig/ipc/msg.c linux-2.6.9-ve023stab054/ipc/msg.c
--- linux-2.6.9-100.orig/ipc/msg.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/ipc/msg.c	2011-06-15 19:26:22.000000000 +0400
@@ -28,6 +28,7 @@
 #include <asm/current.h>
 #include <asm/uaccess.h>
 #include "util.h"
+#include <linux/module.h>
 
 /* sysctl: */
 int msg_ctlmax = MSGMAX;
@@ -71,11 +72,23 @@ static struct ipc_ids msg_ids;
 	ipc_buildid(&msg_ids, id, seq)
 
 static void freeque (struct msg_queue *msq, int id);
-static int newque (key_t key, int msgflg);
+static int newque (key_t key, int msqid, int msgflg);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
 #endif
 
+void prepare_msg(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->_msg_ids = &msg_ids;
+	get_ve0()->_msg_ctlmax = msg_ctlmax;
+	get_ve0()->_msg_ctlmnb = msg_ctlmnb;
+	get_ve0()->_msg_ctlmni = msg_ctlmni;
+	get_ve0()->_msg_bytes = msg_bytes;
+	get_ve0()->_msg_hdrs = msg_hdrs;
+#endif
+}
+
 void __init msg_init (void)
 {
 	ipc_init_ids(&msg_ids,msg_ctlmni);
@@ -85,7 +98,28 @@ void __init msg_init (void)
 #endif
 }
 
-static int newque (key_t key, int msgflg)
+#ifdef CONFIG_VE
+#  define msg_ids (*(get_exec_env()->_msg_ids))
+#  define msg_ctlmax (get_exec_env()->_msg_ctlmax)
+#  define msg_ctlmnb (get_exec_env()->_msg_ctlmnb)
+#  define msg_ctlmni (get_exec_env()->_msg_ctlmni)
+#  define msg_bytes  (get_exec_env()->_msg_bytes)
+#  define msg_hdrs   (get_exec_env()->_msg_hdrs)
+#endif
+
+#ifdef CONFIG_VE
+void ve_msg_ipc_init (void)
+{
+	msg_ctlmax = MSGMAX;
+	msg_ctlmnb = MSGMNB;
+	msg_ctlmni = MSGMNI;
+	atomic_set(&msg_bytes, 0);
+	atomic_set(&msg_hdrs, 0);
+	ve_ipc_init_ids(&msg_ids, MSGMNI);
+}
+#endif
+
+static int newque (key_t key, int msqid, int msgflg)
 {
 	int id;
 	int retval;
@@ -105,7 +139,7 @@ static int newque (key_t key, int msgflg
 		return retval;
 	}
 
-	id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
+	id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni, msqid);
 	if(id == -1) {
 		security_msg_queue_free(msq);
 		ipc_rcu_putref(msq);
@@ -206,12 +240,12 @@ asmlinkage long sys_msgget (key_t key, i
 	
 	down(&msg_ids.sem);
 	if (key == IPC_PRIVATE) 
-		ret = newque(key, msgflg);
+		ret = newque(key, -1, msgflg);
 	else if ((id = ipc_findkey(&msg_ids, key)) == -1) { /* key not used */
 		if (!(msgflg & IPC_CREAT))
 			ret = -ENOENT;
 		else
-			ret = newque(key, msgflg);
+			ret = newque(key, -1, msgflg);
 	} else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
 		ret = -EEXIST;
 	} else {
@@ -446,7 +480,7 @@ asmlinkage long sys_msgctl (int msqid, i
 	ipcp = &msq->q_perm;
 	err = -EPERM;
 	if (current->euid != ipcp->cuid && 
-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
+	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN))
 	    /* We _could_ check for CAP_CHOWN above, but we don't */
 		goto out_unlock_up;
 
@@ -536,7 +570,7 @@ static inline int pipelined_send(struct 
 				msr->r_msg = ERR_PTR(-E2BIG);
 			} else {
 				msr->r_msg = NULL;
-				msq->q_lrpid = msr->r_tsk->pid;
+				msq->q_lrpid = virt_pid(msr->r_tsk);
 				msq->q_rtime = get_seconds();
 				wake_up_process(msr->r_tsk);
 				smp_mb();
@@ -616,7 +650,7 @@ long do_msgsnd(int msqid, long mtype, vo
 		}
 	}
 
-	msq->q_lspid = current->tgid;
+	msq->q_lspid = virt_tgid(current);
 	msq->q_stime = get_seconds();
 
 	if(!pipelined_send(msq,msg)) {
@@ -722,7 +756,7 @@ long do_msgrcv(int msqid, long *pmtype, 
 			list_del(&msg->m_list);
 			msq->q_qnum--;
 			msq->q_rtime = get_seconds();
-			msq->q_lrpid = current->tgid;
+			msq->q_lrpid = virt_tgid(current);
 			msq->q_cbytes -= msg->m_ts;
 			atomic_sub(msg->m_ts,&msg_bytes);
 			atomic_dec(&msg_hdrs);
@@ -882,3 +916,60 @@ done:
 	return len;
 }
 #endif
+
+#ifdef CONFIG_VE
+void ve_msg_ipc_cleanup(void)
+{
+	int i;
+	struct msg_queue *msq;
+
+	down(&msg_ids.sem);
+	for (i = 0; i <= msg_ids.max_id; i++) {
+		msq = msg_lock(i);
+		if (msq == NULL)
+			continue;
+		freeque(msq, i);
+	}
+	up(&msg_ids.sem);
+}
+
+int sysvipc_setup_msg(key_t key, int msqid, int msgflg)
+{
+	int err = 0;
+	struct msg_queue *msq;
+
+	down(&msg_ids.sem);
+	msq = msg_lock(msqid);
+	if (!msq) {
+		err = newque(key, msqid, msgflg);
+		if (err >= 0)
+			msq = msg_lock(msqid);
+	}
+	if (msq)
+		msg_unlock(msq);
+	up(&msg_ids.sem);
+
+	return err > 0 ? 0 : err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_setup_msg);
+
+int sysvipc_walk_msg(int (*func)(int i, struct msg_queue*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct msg_queue * msq;
+
+	down(&msg_ids.sem);
+	for(i = 0; i <= msg_ids.max_id; i++) {
+		if ((msq = msg_lock(i)) == NULL)
+			continue;
+		err = func(msg_buildid(i,msq->q_perm.seq), msq, arg);
+		msg_unlock(msq);
+		if (err)
+			break;
+	}
+	up(&msg_ids.sem);
+	return err;
+}
+EXPORT_SYMBOL(sysvipc_walk_msg);
+#endif
diff -Nurap linux-2.6.9-100.orig/ipc/msgutil.c linux-2.6.9-ve023stab054/ipc/msgutil.c
--- linux-2.6.9-100.orig/ipc/msgutil.c	2004-10-19 01:54:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/ipc/msgutil.c	2011-06-15 19:26:21.000000000 +0400
@@ -8,6 +8,7 @@
  * See the file COPYING for more details.
  */
 
+#include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
 #include <linux/security.h>
@@ -17,6 +18,8 @@
 
 #include "util.h"
 
+#include <ub/ub_mem.h>
+
 struct msg_msgseg {
 	struct msg_msgseg* next;
 	/* the next part of the message follows immediately */
@@ -25,38 +28,40 @@ struct msg_msgseg {
 #define DATALEN_MSG	(PAGE_SIZE-sizeof(struct msg_msg))
 #define DATALEN_SEG	(PAGE_SIZE-sizeof(struct msg_msgseg))
 
-struct msg_msg *load_msg(const void __user *src, int len)
+struct msg_msg *sysv_msg_load(int (*load)(void * dst, int len, int offset,
+					  void * data), int len, void * data)
 {
 	struct msg_msg *msg;
 	struct msg_msgseg **pseg;
 	int err;
 	int alen;
+	int offset = 0;
 
 	alen = len;
 	if (alen > DATALEN_MSG)
 		alen = DATALEN_MSG;
 
-	msg = (struct msg_msg *)kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
+	msg = (struct msg_msg *)ub_kmalloc(sizeof(*msg) + alen,	GFP_KERNEL);
 	if (msg == NULL)
 		return ERR_PTR(-ENOMEM);
 
 	msg->next = NULL;
 	msg->security = NULL;
 
-	if (copy_from_user(msg + 1, src, alen)) {
+	if (load(msg + 1, alen, offset, data)) {
 		err = -EFAULT;
 		goto out_err;
 	}
 
 	len -= alen;
-	src = ((char __user *)src) + alen;
+	offset += alen;
 	pseg = &msg->next;
 	while (len > 0) {
 		struct msg_msgseg *seg;
 		alen = len;
 		if (alen > DATALEN_SEG)
 			alen = DATALEN_SEG;
-		seg = (struct msg_msgseg *)kmalloc(sizeof(*seg) + alen,
+		seg = (struct msg_msgseg *)ub_kmalloc(sizeof(*seg) + alen,
 						 GFP_KERNEL);
 		if (seg == NULL) {
 			err = -ENOMEM;
@@ -64,13 +69,13 @@ struct msg_msg *load_msg(const void __us
 		}
 		*pseg = seg;
 		seg->next = NULL;
-		if (copy_from_user(seg + 1, src, alen)) {
+		if (load(seg + 1, alen, offset, data)) {
 			err = -EFAULT;
 			goto out_err;
 		}
 		pseg = &seg->next;
 		len -= alen;
-		src = ((char __user *)src) + alen;
+		offset += alen;
 	}
 
 	err = security_msg_msg_alloc(msg);
@@ -83,33 +88,58 @@ out_err:
 	free_msg(msg);
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL_GPL(sysv_msg_load);
 
-int store_msg(void __user *dest, struct msg_msg *msg, int len)
+static int do_load_msg(void * dst, int len, int offset, void * data)
+{
+	return copy_from_user(dst, data + offset, len);
+}
+
+struct msg_msg *load_msg(const void __user *src, int len)
+{
+	return sysv_msg_load(do_load_msg, len, (void*)src);
+}
+
+int sysv_msg_store(struct msg_msg *msg,
+		   int (*store)(void * src, int len, int offset, void * data),
+		   int len, void * data)
 {
 	int alen;
+	int offset = 0;
 	struct msg_msgseg *seg;
 
 	alen = len;
 	if (alen > DATALEN_MSG)
 		alen = DATALEN_MSG;
-	if (copy_to_user(dest, msg + 1, alen))
+	if (store(msg + 1, alen, offset, data))
 		return -1;
 
 	len -= alen;
-	dest = ((char __user *)dest) + alen;
+	offset += alen;
 	seg = msg->next;
 	while (len > 0) {
 		alen = len;
 		if (alen > DATALEN_SEG)
 			alen = DATALEN_SEG;
-		if (copy_to_user(dest, seg + 1, alen))
+		if (store(seg + 1, alen, offset, data))
 			return -1;
 		len -= alen;
-		dest = ((char __user *)dest) + alen;
+		offset += alen;
 		seg = seg->next;
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(sysv_msg_store);
+
+static int do_store_msg(void * src, int len, int offset, void * data)
+{
+	return copy_to_user(data + offset, src, len);
+}
+
+int store_msg(void __user *dest, struct msg_msg *msg, int len)
+{
+	return sysv_msg_store(msg, do_store_msg, len, dest);
+}
 
 void free_msg(struct msg_msg *msg)
 {
diff -Nurap linux-2.6.9-100.orig/ipc/sem.c linux-2.6.9-ve023stab054/ipc/sem.c
--- linux-2.6.9-100.orig/ipc/sem.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/ipc/sem.c	2011-06-15 19:26:21.000000000 +0400
@@ -73,8 +73,10 @@
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <asm/uaccess.h>
+#include <linux/module.h>
 #include "util.h"
 
+#include <ub/ub_mem.h>
 
 #define sem_lock(id)	((struct sem_array*)ipc_lock(&sem_ids,id))
 #define sem_unlock(sma)	ipc_unlock(&(sma)->sem_perm)
@@ -83,9 +85,13 @@
 	ipc_checkid(&sem_ids,&sma->sem_perm,semid)
 #define sem_buildid(id, seq) \
 	ipc_buildid(&sem_ids, id, seq)
+
+int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI};
+
 static struct ipc_ids sem_ids;
+static int used_sems;
 
-static int newary (key_t, int, int);
+static int newary (key_t, int, int, int);
 static void freeary (struct sem_array *sma, int id);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
@@ -103,24 +109,51 @@ static int sysvipc_sem_read_proc(char *b
  *	
  */
 
-int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI};
 #define sc_semmsl	(sem_ctls[0])
 #define sc_semmns	(sem_ctls[1])
 #define sc_semopm	(sem_ctls[2])
 #define sc_semmni	(sem_ctls[3])
 
-static int used_sems;
+void prepare_sem(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->_sem_ids = &sem_ids;
+	get_ve0()->_used_sems = used_sems;
+	get_ve0()->_sem_ctls[0] = sem_ctls[0];
+	get_ve0()->_sem_ctls[1] = sem_ctls[1];
+	get_ve0()->_sem_ctls[2] = sem_ctls[2];
+	get_ve0()->_sem_ctls[3] = sem_ctls[3];
+#endif
+}
 
 void __init sem_init (void)
 {
 	used_sems = 0;
-	ipc_init_ids(&sem_ids,sc_semmni);
+	ipc_init_ids(&sem_ids, SEMMNI);
 
 #ifdef CONFIG_PROC_FS
 	create_proc_read_entry("sysvipc/sem", 0, NULL, sysvipc_sem_read_proc, NULL);
 #endif
 }
 
+#ifdef CONFIG_VE
+#  define sem_ids	(*(get_exec_env()->_sem_ids))
+#  define used_sems	(get_exec_env()->_used_sems)
+#  define sem_ctls	(get_exec_env()->_sem_ctls)
+#endif
+
+#ifdef CONFIG_VE
+void ve_sem_ipc_init (void)
+{
+	used_sems = 0;
+	sem_ctls[0] = SEMMSL;
+	sem_ctls[1] = SEMMNS;
+	sem_ctls[2] = SEMOPM;
+	sem_ctls[3] = SEMMNI;
+	ve_ipc_init_ids(&sem_ids, SEMMNI);
+}
+#endif
+
 /*
  * Lockless wakeup algorithm:
  * Without the check/retry algorithm a lockless wakeup is possible:
@@ -155,7 +188,7 @@ void __init sem_init (void)
  */
 #define IN_WAKEUP	1
 
-static int newary (key_t key, int nsems, int semflg)
+static int newary (key_t key, int semid, int nsems, int semflg)
 {
 	int id;
 	int retval;
@@ -184,7 +217,7 @@ static int newary (key_t key, int nsems,
 		return retval;
 	}
 
-	id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
+	id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni, semid);
 	if(id == -1) {
 		security_sem_free(sma);
 		ipc_rcu_putref(sma);
@@ -213,12 +246,12 @@ asmlinkage long sys_semget (key_t key, i
 	down(&sem_ids.sem);
 	
 	if (key == IPC_PRIVATE) {
-		err = newary(key, nsems, semflg);
+		err = newary(key, -1, nsems, semflg);
 	} else if ((id = ipc_findkey(&sem_ids, key)) == -1) {  /* key not used */
 		if (!(semflg & IPC_CREAT))
 			err = -ENOENT;
 		else
-			err = newary(key, nsems, semflg);
+			err = newary(key, -1, nsems, semflg);
 	} else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
 		err = -EEXIST;
 	} else {
@@ -739,7 +772,7 @@ static int semctl_main(int semid, int se
 		for (un = sma->undo; un; un = un->id_next)
 			un->semadj[semnum] = 0;
 		curr->semval = val;
-		curr->sempid = current->tgid;
+		curr->sempid = virt_tgid(current);
 		sma->sem_ctime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
@@ -819,7 +852,7 @@ static int semctl_down(int semid, int se
 	ipcp = &sma->sem_perm;
 	
 	if (current->euid != ipcp->cuid && 
-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
+	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN)) {
 	    	err=-EPERM;
 		goto out_unlock;
 	}
@@ -940,7 +973,8 @@ static inline int get_undo_list(struct s
 	undo_list = current->sysvsem.undo_list;
 	if (!undo_list) {
 		size = sizeof(struct sem_undo_list);
-		undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
+		undo_list = (struct sem_undo_list *) ub_kmalloc(size,
+				GFP_KERNEL);
 		if (undo_list == NULL)
 			return -ENOMEM;
 		memset(undo_list, 0, size);
@@ -1006,7 +1040,8 @@ static struct sem_undo *find_undo(int se
 	ipc_rcu_getref(sma);
 	sem_unlock(sma);
 
-	new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
+	new = (struct sem_undo *) ub_kmalloc(sizeof(struct sem_undo) +
+			sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
 		ipc_lock_by_ptr(&sma->sem_perm);
 		ipc_rcu_putref(sma);
@@ -1064,7 +1099,7 @@ asmlinkage long sys_semtimedop(int semid
 	if (nsops > sc_semopm)
 		return -E2BIG;
 	if(nsops > SEMOPM_FAST) {
-		sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
+		sops = ub_kmalloc(sizeof(*sops)*nsops, GFP_KERNEL);
 		if(sops==NULL)
 			return -ENOMEM;
 	}
@@ -1136,7 +1171,7 @@ retry_undos:
 	if (error)
 		goto out_unlock_free;
 
-	error = try_atomic_semop (sma, sops, nsops, un, current->tgid);
+	error = try_atomic_semop (sma, sops, nsops, un, virt_tgid(current));
 	if (error <= 0) {
 		if (alter && error == 0)
 			update_queue (sma);
@@ -1151,7 +1186,7 @@ retry_undos:
 	queue.sops = sops;
 	queue.nsops = nsops;
 	queue.undo = un;
-	queue.pid = current->tgid;
+	queue.pid = virt_tgid(current);
 	queue.id = semid;
 	queue.alter = alter;
 	if (alter)
@@ -1323,7 +1358,7 @@ found:
 					sem->semval = 0;
 				if (sem->semval > SEMVMX)
 					sem->semval = SEMVMX;
-				sem->sempid = current->tgid;
+				sem->sempid = virt_tgid(current);
 			}
 		}
 		sma->sem_otime = get_seconds();
@@ -1334,6 +1369,7 @@ next_entry:
 	}
 	kfree(undo_list);
 }
+EXPORT_SYMBOL(exit_sem);
 
 #ifdef CONFIG_PROC_FS
 static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
@@ -1383,3 +1419,60 @@ done:
 	return len;
 }
 #endif
+
+#ifdef CONFIG_VE
+void ve_sem_ipc_cleanup(void)
+{
+	int i;
+	struct sem_array *sma;
+
+	down(&sem_ids.sem);
+	for (i = 0; i <= sem_ids.max_id; i++) {
+		sma = sem_lock(i);
+		if (sma == NULL)
+			continue;
+		freeary(sma, i);
+	}
+	up(&sem_ids.sem);
+}
+
+int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg)
+{
+	int err = 0;
+	struct sem_array *sma;
+
+	down(&sem_ids.sem);
+	sma = sem_lock(semid);
+	if (!sma) {
+		err = newary(key, semid, size, semflg);
+		if (err >= 0)
+			sma = sem_lock(semid);
+	}
+	if (sma)
+		sem_unlock(sma);
+	up(&sem_ids.sem);
+
+	return err > 0 ? 0 : err;
+}
+EXPORT_SYMBOL(sysvipc_setup_sem);
+
+int sysvipc_walk_sem(int (*func)(int i, struct sem_array*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct sem_array *sma;
+
+	down(&sem_ids.sem);
+	for (i = 0; i <= sem_ids.max_id; i++) {
+		if ((sma = sem_lock(i)) == NULL)
+			continue;
+		err = func(sem_buildid(i,sma->sem_perm.seq), sma, arg);
+		sem_unlock(sma);
+		if (err)
+			break;
+	}
+	up(&sem_ids.sem);
+	return err;
+}
+EXPORT_SYMBOL(sysvipc_walk_sem);
+#endif
diff -Nurap linux-2.6.9-100.orig/ipc/shm.c linux-2.6.9-ve023stab054/ipc/shm.c
--- linux-2.6.9-100.orig/ipc/shm.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/ipc/shm.c	2011-06-15 19:26:22.000000000 +0400
@@ -28,6 +28,10 @@
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <asm/uaccess.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
 
 #include "util.h"
 
@@ -44,7 +48,7 @@ static struct ipc_ids shm_ids;
 #define shm_buildid(id, seq) \
 	ipc_buildid(&shm_ids, id, seq)
 
-static int newseg (key_t key, int shmflg, size_t size);
+static int newseg (key_t key, int shmid, int shmflg, size_t size);
 static void shm_open (struct vm_area_struct *shmd);
 static void shm_close (struct vm_area_struct *shmd);
 #ifdef CONFIG_PROC_FS
@@ -56,6 +60,28 @@ size_t 	shm_ctlall = SHMALL;
 int 	shm_ctlmni = SHMMNI;
 
 static int shm_tot; /* total number of shared memory pages */
+  
+void prepare_shm(void)
+{
+#ifdef CONFIG_VE
+	int i;
+	struct shmid_kernel* shp;
+
+	get_ve0()->_shm_ids = &shm_ids;
+	for (i = 0; i <= shm_ids.max_id; i++) {
+		shp = (struct shmid_kernel *)ipc_lock(&shm_ids, i);
+		if (shp != NULL) {
+			shp->_shm_ids = &shm_ids;
+			ipc_unlock(&shp->shm_perm);
+		}
+	}
+
+	get_ve0()->_shm_ctlmax = shm_ctlmax;
+	get_ve0()->_shm_ctlall = shm_ctlall;
+	get_ve0()->_shm_ctlmni = shm_ctlmni;
+	get_ve0()->_shm_tot = shm_tot;
+#endif
+}
 
 void __init shm_init (void)
 {
@@ -65,6 +91,42 @@ void __init shm_init (void)
 #endif
 }
 
+#ifdef CONFIG_VE
+#  define shm_ids	(*(get_exec_env()->_shm_ids))
+#  define shm_ctlmax	(get_exec_env()->_shm_ctlmax)
+#  define shm_ctlall	(get_exec_env()->_shm_ctlall)
+#  define shm_ctlmni	(get_exec_env()->_shm_ctlmni)
+/* renamed since there is a struct field named shm_tot */
+#  define shm_total	(get_exec_env()->_shm_tot)
+#else
+#  define shm_total	shm_tot
+#endif
+
+#ifdef CONFIG_VE
+void ve_shm_ipc_init (void)
+{
+	shm_ctlmax = SHMMAX;
+ 	shm_ctlall = SHMALL;
+ 	shm_ctlmni = SHMMNI;
+	shm_total = 0;
+	ve_ipc_init_ids(&shm_ids, 1);
+}
+#endif
+
+static struct shmid_kernel* shm_lock_by_sb(int id, struct super_block* sb)
+{
+	struct ve_struct *fs_envid;
+	fs_envid = VE_OWNER_FSTYPE(sb->s_type);
+	return (struct shmid_kernel *)ipc_lock(fs_envid->_shm_ids, id);
+}
+
+static inline int *shm_total_sb(struct super_block *sb)
+{
+	struct ve_struct *fs_envid;
+	fs_envid = VE_OWNER_FSTYPE(sb->s_type);
+	return &fs_envid->_shm_tot;
+}
+
 static inline int shm_checkid(struct shmid_kernel *s, int id)
 {
 	if (ipc_checkid(&shm_ids,&s->shm_perm,id))
@@ -72,25 +134,25 @@ static inline int shm_checkid(struct shm
 	return 0;
 }
 
-static inline struct shmid_kernel *shm_rmid(int id)
+static inline struct shmid_kernel *shm_rmid(struct ipc_ids *ids, int id)
 {
-	return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
+	return (struct shmid_kernel *)ipc_rmid(ids, id);
 }
 
-static inline int shm_addid(struct shmid_kernel *shp)
+static inline int shm_addid(struct shmid_kernel *shp, int reqid)
 {
-	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
+	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni, reqid);
 }
 
 
 
-static inline void shm_inc (int id) {
+static inline void shm_inc (int id, struct super_block * sb) {
 	struct shmid_kernel *shp;
 
-	if(!(shp = shm_lock(id)))
+	if(!(shp = shm_lock_by_sb(id, sb)))
 		BUG();
 	shp->shm_atim = get_seconds();
-	shp->shm_lprid = current->tgid;
+	shp->shm_lprid = virt_tgid(current);
 	shp->shm_nattch++;
 	shm_unlock(shp);
 }
@@ -98,7 +160,40 @@ static inline void shm_inc (int id) {
 /* This is called by fork, once for every shm attach. */
 static void shm_open (struct vm_area_struct *shmd)
 {
-	shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
+	shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino,
+			shmd->vm_file->f_dentry->d_inode->i_sb);
+}
+
+int shmem_lock(struct shmid_kernel *shp, int lock, struct user_struct *user)
+{
+	struct file *file = shp->shm_file;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	unsigned long size;
+
+	spin_lock(&info->lock);
+	if (lock && !(info->flags & VM_LOCKED)) {
+		size = shp->shm_segsz + PAGE_SIZE - 1;
+		if (ub_locked_mem_charge(shmid_ub(shp), size) < 0)
+			goto out_charge;
+		if (!user_shm_lock(inode->i_size, user))
+			goto out_nomem;
+		info->flags |= VM_LOCKED;
+	}
+	if (!lock && (info->flags & VM_LOCKED) && user) {
+		size = shp->shm_segsz + PAGE_SIZE - 1;
+		user_shm_unlock(inode->i_size, user);
+		ub_locked_mem_uncharge(shmid_ub(shp), size);
+		info->flags &= ~VM_LOCKED;
+	}
+	spin_unlock(&info->lock);
+	return 0;
+
+out_nomem:
+	ub_locked_mem_uncharge(shmid_ub(shp), size);
+out_charge:
+	spin_unlock(&info->lock);
+	return -ENOMEM;
 }
 
 /*
@@ -111,16 +206,27 @@ static void shm_open (struct vm_area_str
  */
 static void shm_destroy (struct shmid_kernel *shp)
 {
-	shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	shm_rmid (shp->id);
+	int numpages;
+	struct super_block *sb;
+	int *shm_totalp;
+	struct file *file;
+
+	file = shp->shm_file;
+	numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	sb = file->f_dentry->d_inode->i_sb;
+	shm_totalp = shm_total_sb(sb);
+	*shm_totalp -= numpages;
+	shm_rmid(shp->_shm_ids, shp->id);
 	shm_unlock(shp);
 	if (!is_file_hugepages(shp->shm_file))
-		shmem_lock(shp->shm_file, 0, shp->mlock_user);
+		shmem_lock(shp, 0, shp->mlock_user);
 	else
 		user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size,
 						shp->mlock_user);
-	fput (shp->shm_file);
+	fput (file);
 	security_shm_free(shp);
+	put_beancounter(shmid_ub(shp));
+	shmid_ub(shp) = NULL;
 	ipc_rcu_putref(shp);
 }
 
@@ -134,13 +240,25 @@ static void shm_close (struct vm_area_st
 {
 	struct file * file = shmd->vm_file;
 	int id = file->f_dentry->d_inode->i_ino;
+	struct super_block *sb;
 	struct shmid_kernel *shp;
+	struct ipc_ids* ids;
+#ifdef CONFIG_VE
+	struct ve_struct *fs_envid;
+#endif
 
-	down (&shm_ids.sem);
+	sb = file->f_dentry->d_inode->i_sb;
+#ifdef CONFIG_VE
+	fs_envid = get_ve(VE_OWNER_FSTYPE(sb->s_type));
+	ids = fs_envid->_shm_ids;
+#else
+	ids = &shm_ids;
+#endif
+	down (&ids->sem);
 	/* remove from the list of attaches of the shm segment */
-	if(!(shp = shm_lock(id)))
+	if(!(shp = shm_lock_by_sb(id, sb)))
 		BUG();
-	shp->shm_lprid = current->tgid;
+	shp->shm_lprid = virt_tgid(current);
 	shp->shm_dtim = get_seconds();
 	shp->shm_nattch--;
 	if(shp->shm_nattch == 0 &&
@@ -148,7 +266,10 @@ static void shm_close (struct vm_area_st
 		shm_destroy (shp);
 	else
 		shm_unlock(shp);
-	up (&shm_ids.sem);
+	up (&ids->sem);
+#ifdef CONFIG_VE
+	put_ve(fs_envid);
+#endif
 }
 
 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
@@ -157,7 +278,8 @@ static int shm_mmap(struct file * file, 
 	vma->vm_ops = &shm_vm_ops;
 	if (!(vma->vm_flags & VM_WRITE))
 		vma->vm_flags &= ~VM_MAYWRITE;
-	shm_inc(file->f_dentry->d_inode->i_ino);
+	shm_inc(file->f_dentry->d_inode->i_ino,
+			file->f_dentry->d_inode->i_sb);
 	return 0;
 }
 
@@ -175,19 +297,19 @@ static struct vm_operations_struct shm_v
 #endif
 };
 
-static int newseg (key_t key, int shmflg, size_t size)
+static int newseg (key_t key, int shmid, int shmflg, size_t size)
 {
 	int error;
 	struct shmid_kernel *shp;
 	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
 	struct file * file;
-	char name[13];
+	char name[26];
 	int id;
 
 	if (size < SHMMIN || size > shm_ctlmax)
 		return -EINVAL;
 
-	if (shm_tot + numpages > shm_ctlall)
+	if (shm_total + numpages > shm_ctlall)
 		return -ENOSPC;
 
 	shp = ipc_rcu_alloc(sizeof(*shp));
@@ -210,7 +332,11 @@ static int newseg (key_t key, int shmflg
 		file = hugetlb_zero_setup(size);
 		shp->mlock_user = current->user;
 	} else {
+#ifdef CONFIG_VE
+		sprintf (name, "VE%d.SYSV%08x", get_exec_env()->veid, key);
+#else
 		sprintf (name, "SYSV%08x", key);
+#endif
 		file = shmem_file_setup(name, size, VM_ACCOUNT);
 	}
 	error = PTR_ERR(file);
@@ -218,24 +344,26 @@ static int newseg (key_t key, int shmflg
 		goto no_file;
 
 	error = -ENOSPC;
-	id = shm_addid(shp);
+	id = shm_addid(shp, shmid);
 	if(id == -1) 
 		goto no_id;
 
-	shp->shm_cprid = current->tgid;
+	shp->shm_cprid = virt_tgid(current);
 	shp->shm_lprid = 0;
 	shp->shm_atim = shp->shm_dtim = 0;
 	shp->shm_ctim = get_seconds();
 	shp->shm_segsz = size;
 	shp->shm_nattch = 0;
 	shp->id = shm_buildid(id,shp->shm_perm.seq);
+	shp->_shm_ids = &shm_ids;
 	shp->shm_file = file;
+	shmid_ub(shp) = get_beancounter(get_exec_ub());
 	file->f_dentry->d_inode->i_ino = shp->id;
 	if (shmflg & SHM_HUGETLB)
 		set_file_hugepages(file);
 	else
 		file->f_op = &shm_file_operations;
-	shm_tot += numpages;
+	shm_total += numpages;
 	shm_unlock(shp);
 	return shp->id;
 
@@ -254,12 +382,12 @@ asmlinkage long sys_shmget (key_t key, s
 
 	down(&shm_ids.sem);
 	if (key == IPC_PRIVATE) {
-		err = newseg(key, shmflg, size);
+		err = newseg(key, -1, shmflg, size);
 	} else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
 		if (!(shmflg & IPC_CREAT))
 			err = -ENOENT;
 		else
-			err = newseg(key, shmflg, size);
+			err = newseg(key, -1, shmflg, size);
 	} else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
 		err = -EEXIST;
 	} else {
@@ -453,7 +581,7 @@ asmlinkage long sys_shmctl (int shmid, i
 		down(&shm_ids.sem);
 		shm_info.used_ids = shm_ids.in_use;
 		shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
-		shm_info.shm_tot = shm_tot;
+		shm_info.shm_tot = shm_total;
 		shm_info.swap_attempts = 0;
 		shm_info.swap_successes = 0;
 		err = shm_ids.max_id;
@@ -540,14 +668,15 @@ asmlinkage long sys_shmctl (int shmid, i
 		if(cmd==SHM_LOCK) {
 			struct user_struct * user = current->user;
 			if (!is_file_hugepages(shp->shm_file)) {
-				err = shmem_lock(shp->shm_file, 1, user);
-				if (!err) {
+				err = shmem_lock(shp, 1, user);
+				if (!err &&
+				    !(shp->shm_flags & SHM_LOCKED)) {
 					shp->shm_flags |= SHM_LOCKED;
 					shp->mlock_user = user;
 				}
 			}
 		} else if (!is_file_hugepages(shp->shm_file)) {
-			shmem_lock(shp->shm_file, 0, shp->mlock_user);
+			shmem_lock(shp, 0, shp->mlock_user);
 			shp->shm_flags &= ~SHM_LOCKED;
 			shp->mlock_user = NULL;
 		}
@@ -577,7 +706,7 @@ asmlinkage long sys_shmctl (int shmid, i
 
 		if (current->euid != shp->shm_perm.uid &&
 		    current->euid != shp->shm_perm.cuid && 
-		    !capable(CAP_SYS_ADMIN)) {
+		    !capable(CAP_VE_SYS_ADMIN)) {
 			err=-EPERM;
 			goto out_unlock_up;
 		}
@@ -616,7 +745,7 @@ asmlinkage long sys_shmctl (int shmid, i
 		err=-EPERM;
 		if (current->euid != shp->shm_perm.uid &&
 		    current->euid != shp->shm_perm.cuid && 
-		    !capable(CAP_SYS_ADMIN)) {
+		    !capable(CAP_VE_SYS_ADMIN)) {
 			goto out_unlock_up;
 		}
 
@@ -918,3 +1047,74 @@ done:
 	return len;
 }
 #endif
+
+#ifdef CONFIG_VE
+void ve_shm_ipc_cleanup(void)
+{
+	int i;
+
+	down(&shm_ids.sem);
+	for (i = 0; i <= shm_ids.max_id; i++) {
+		struct shmid_kernel *shp;
+
+		if (!(shp = shm_lock(i)))
+			continue;
+		if (shp->shm_nattch) {
+			shp->shm_flags |= SHM_DEST;
+			shp->shm_perm.key = IPC_PRIVATE;
+			shm_unlock(shp);
+		} else
+			shm_destroy(shp);
+	}
+	up(&shm_ids.sem);
+}
+#endif
+
+struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg)
+{
+	struct shmid_kernel *shp;
+	struct file *file;
+
+	down(&shm_ids.sem);
+	shp = shm_lock(shmid);
+	if (!shp) {
+		int err;
+
+		err = newseg(key, shmid, shmflg, size);
+		file = ERR_PTR(err);
+		if (err < 0)
+			goto out;
+		shp = shm_lock(shmid);
+	}
+	file = ERR_PTR(-EINVAL);
+	if (shp) {
+		file = shp->shm_file;
+		get_file(file);
+		shm_unlock(shp);
+	}
+out:
+	up(&shm_ids.sem);
+
+	return file;
+}
+EXPORT_SYMBOL(sysvipc_setup_shm);
+
+int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct shmid_kernel* shp;
+
+	down(&shm_ids.sem);
+	for(i = 0; i <= shm_ids.max_id; i++) {
+		if ((shp = shm_lock(i)) == NULL)
+			continue;
+		err = func(shp, arg);
+		shm_unlock(shp);
+		if (err)
+			break;
+	}
+	up(&shm_ids.sem);
+	return err;
+}
+EXPORT_SYMBOL(sysvipc_walk_shm);
diff -Nurap linux-2.6.9-100.orig/ipc/util.c linux-2.6.9-ve023stab054/ipc/util.c
--- linux-2.6.9-100.orig/ipc/util.c	2004-10-19 01:55:36.000000000 +0400
+++ linux-2.6.9-ve023stab054/ipc/util.c	2011-06-15 19:26:21.000000000 +0400
@@ -13,6 +13,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/shm.h>
 #include <linux/init.h>
@@ -27,8 +28,12 @@
 
 #include <asm/unistd.h>
 
+#include <ub/ub_mem.h>
+
 #include "util.h"
 
+DCL_VE_OWNER(IPCIDS, STATIC_SOFT, struct ipc_ids, owner_env, inline, ())
+
 /**
  *	ipc_init	-	initialise IPC subsystem
  *
@@ -55,7 +60,7 @@ __initcall(ipc_init);
  *	array itself. 
  */
  
-void __init ipc_init_ids(struct ipc_ids* ids, int size)
+void ve_ipc_init_ids(struct ipc_ids* ids, int size)
 {
 	int i;
 	sema_init(&ids->sem,1);
@@ -82,7 +87,25 @@ void __init ipc_init_ids(struct ipc_ids*
 	}
 	for(i=0;i<ids->size;i++)
 		ids->entries[i].p = NULL;
+#ifdef CONFIG_VE
+	SET_VE_OWNER_IPCIDS(ids, get_exec_env());
+#endif
+}
+
+void __init ipc_init_ids(struct ipc_ids* ids, int size)
+{
+	ve_ipc_init_ids(ids, size);
+}
+
+#ifdef CONFIG_VE
+static void ipc_free_ids(struct ipc_ids* ids)
+{
+	if (ids == NULL)
+		return;
+	ipc_rcu_putref(ids->entries);
+	kfree(ids);
 }
+#endif
 
 /**
  *	ipc_findkey	-	find a key in an ipc identifier set	
@@ -164,10 +187,20 @@ static int grow_ary(struct ipc_ids* ids,
  *	Called with ipc_ids.sem held.
  */
  
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
+int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid)
 {
 	int id;
 
+	if (reqid >= 0) {
+		id = reqid%SEQ_MULTIPLIER;
+		size = grow_ary(ids,id+1);
+		if (id >= size)
+			return -1;
+		if (ids->entries[id].p == NULL)
+			goto found;
+		return -1;
+	}
+
 	size = grow_ary(ids,size);
 
 	/*
@@ -180,6 +213,10 @@ int ipc_addid(struct ipc_ids* ids, struc
 	}
 	return -1;
 found:
+#ifdef CONFIG_VE
+	if (ids->in_use == 0)
+		(void)get_ve(VE_OWNER_IPCIDS(ids));
+#endif
 	ids->in_use++;
 	if (id > ids->max_id)
 		ids->max_id = id;
@@ -187,9 +224,13 @@ found:
 	new->cuid = new->uid = current->euid;
 	new->gid = new->cgid = current->egid;
 
-	new->seq = ids->seq++;
-	if(ids->seq > ids->seq_max)
-		ids->seq = 0;
+	if (reqid >= 0) {
+		new->seq = reqid/SEQ_MULTIPLIER;
+	} else {
+		new->seq = ids->seq++;
+		if(ids->seq > ids->seq_max)
+			ids->seq = 0;
+	}
 
 	new->lock = SPIN_LOCK_UNLOCKED;
 	new->deleted = 0;
@@ -237,6 +278,10 @@ struct kern_ipc_perm* ipc_rmid(struct ip
 		} while (ids->entries[lid].p == NULL);
 		ids->max_id = lid;
 	}
+#ifdef CONFIG_VE
+	if (ids->in_use == 0)
+		put_ve(VE_OWNER_IPCIDS(ids));
+#endif
 	p->deleted = 1;
 	return p;
 }
@@ -253,9 +298,9 @@ void* ipc_alloc(int size)
 {
 	void* out;
 	if(size > PAGE_SIZE)
-		out = vmalloc(size);
+		out = ub_vmalloc(size);
 	else
-		out = kmalloc(size, GFP_KERNEL);
+		out = ub_kmalloc(size, GFP_KERNEL);
 	return out;
 }
 
@@ -338,7 +383,7 @@ void* ipc_rcu_alloc(int size)
 	 * workqueue if necessary (for vmalloc). 
 	 */
 	if (rcu_use_vmalloc(size)) {
-		out = vmalloc(HDRLEN_VMALLOC + size);
+		out = ub_vmalloc(HDRLEN_VMALLOC + size);
 		if (out) {
 			out += HDRLEN_VMALLOC;
 			container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
@@ -563,6 +608,85 @@ int ipc_checkid(struct ipc_ids* ids, str
 	return 0;
 }
 
+#ifdef CONFIG_VE
+
+void prepare_ipc(void)
+{
+	/*
+	 * Note: we don't need to call SET_VE_OWNER_IPCIDS inside,
+	 * since we use static variables for ve0 (see STATIC_SOFT decl).
+	 */
+	prepare_msg();
+	prepare_sem();
+	prepare_shm();
+}
+
+int init_ve_ipc(struct ve_struct * envid)
+{
+	struct ve_struct * saved_envid;
+
+	envid->_msg_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
+							GFP_KERNEL);
+	if (envid->_msg_ids == NULL)
+		goto out_nomem;
+	envid->_sem_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
+							GFP_KERNEL);
+	if (envid->_sem_ids == NULL)
+		goto out_free_msg;
+	envid->_shm_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
+							GFP_KERNEL);
+	if (envid->_shm_ids == NULL)
+		goto out_free_sem;
+
+	/*
+	 * Bad style, but save a lot of code (charging to proper VE)
+	 * Here we temporary change VEID of the process involved in VE init.
+	 * The same is effect for ve_ipc_cleanup in real_do_env_cleanup().
+	 */
+	saved_envid = set_exec_env(envid);
+
+	ve_msg_ipc_init();
+	ve_sem_ipc_init();
+	ve_shm_ipc_init();
+
+	(void)set_exec_env(saved_envid);
+	return 0;
+
+out_free_sem:
+	kfree(envid->_sem_ids);
+out_free_msg:
+	kfree(envid->_msg_ids);
+out_nomem:
+	return -ENOMEM;
+}
+
+void ve_ipc_cleanup(void)
+{
+	ve_msg_ipc_cleanup();
+	ve_sem_ipc_cleanup();
+	ve_shm_ipc_cleanup();
+}
+
+void ve_ipc_free(struct ve_struct *envid)
+{
+	ipc_free_ids(envid->_msg_ids);
+	ipc_free_ids(envid->_sem_ids);
+	ipc_free_ids(envid->_shm_ids);
+	envid->_msg_ids = envid->_sem_ids = envid->_shm_ids = NULL;
+}
+
+void fini_ve_ipc(struct ve_struct *ptr)
+{
+	ve_ipc_cleanup();
+	ve_ipc_free(ptr);
+}
+
+EXPORT_SYMBOL(init_ve_ipc);
+EXPORT_SYMBOL(ve_ipc_cleanup);
+EXPORT_SYMBOL(ve_ipc_free);
+EXPORT_SYMBOL(fini_ve_ipc);
+#endif /* CONFIG_VE */
+
 #ifdef __ARCH_WANT_IPC_PARSE_VERSION
 
 
diff -Nurap linux-2.6.9-100.orig/ipc/util.h linux-2.6.9-ve023stab054/ipc/util.h
--- linux-2.6.9-100.orig/ipc/util.h	2004-10-19 01:54:32.000000000 +0400
+++ linux-2.6.9-ve023stab054/ipc/util.h	2011-06-15 19:26:20.000000000 +0400
@@ -15,6 +15,20 @@ void sem_init (void);
 void msg_init (void);
 void shm_init (void);
 
+#ifdef CONFIG_VE
+
+void ve_msg_ipc_init(void);
+void ve_sem_ipc_init(void);
+void ve_shm_ipc_init(void);
+void prepare_msg(void);
+void prepare_sem(void);
+void prepare_shm(void);
+void ve_msg_ipc_cleanup(void);
+void ve_sem_ipc_cleanup(void);
+void ve_shm_ipc_cleanup(void);
+
+#endif
+
 struct ipc_ids {
 	int size;
 	int in_use;
@@ -23,17 +37,21 @@ struct ipc_ids {
 	unsigned short seq_max;
 	struct semaphore sem;	
 	struct ipc_id* entries;
+	struct ve_struct *owner_env;
 };
 
+DCL_VE_OWNER_PROTO(IPCIDS, STATIC_SOFT, struct ipc_ids, owner_env, inline, ())
+
 struct ipc_id {
 	struct kern_ipc_perm* p;
 };
 
-void __init ipc_init_ids(struct ipc_ids* ids, int size);
+void ipc_init_ids(struct ipc_ids* ids, int size);
+void ve_ipc_init_ids(struct ipc_ids* ids, int size);
 
 /* must be called with ids->sem acquired.*/
 int ipc_findkey(struct ipc_ids* ids, key_t key);
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
+int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid);
 
 /* must be called with both locks acquired. */
 struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
diff -Nurap linux-2.6.9-100.orig/kernel/Kconfig.openvz linux-2.6.9-ve023stab054/kernel/Kconfig.openvz
--- linux-2.6.9-100.orig/kernel/Kconfig.openvz	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/Kconfig.openvz	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,73 @@
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+config VE
+	bool "Virtual Environment support"
+	depends on !SECURITY
+	default y
+	help
+	  This option adds support of virtual Linux running on the original box
+	  with fully supported virtual network driver, tty subsystem and
+	  configurable access for hardware and other resources.
+
+config VE_CALLS
+	tristate "VE calls interface"
+	depends on VE
+	default m
+	help
+	  This option controls how to build vzmon code containing VE calls.
+	  By default it's build in module vzmon.o
+
+config VZ_GENCALLS
+	bool
+	default y
+
+config VE_NETDEV
+	tristate "VE networking"
+	depends on VE
+	default m
+	help
+	  This option controls whether to build VE networking code.
+
+config VE_ETHDEV
+	tristate "Virtual ethernet device"
+	depends on VE_CALLS
+	default m
+	help
+	  This option controls whether to build virtual ethernet device.
+
+config VE_IPTABLES
+	bool "VE netfiltering"
+	depends on VE && VE_NETDEV && INET && NETFILTER
+	default y
+	help
+	  This option controls whether to build VE netfiltering code.
+
+config VZ_WDOG
+	tristate "VE watchdog module"
+	depends on VE
+	default m
+	help
+	  This option controls building of vzwdog module, which dumps
+	  a lot of useful system info on console periodically.
+
+config VZ_CHECKPOINT
+	tristate "Checkpointing & restoring Virtual Environments"
+	depends on SOFTWARE_SUSPEND
+	default m
+	help
+	  This option adds two modules, "cpt" and "rst", which allow
+	  to save a running Virtual Environment and restore it
+	  on another host (live migration) or on the same host (checkpointing).
+
+config VZ_EVENT
+	tristate "Enable sending notifications of the VE status change through the netlink socket"
+	depends on VE && VE_CALLS && NET
+	default m
+	help
+	  This option provides for sending notifications of the VE
+	  events to the curious user space applications through
+	  the netlink socket just like the core kernel
+	  networking code does. By now just the notifications of
+	  the VE essensial status changes are being sent.
diff -Nurap linux-2.6.9-100.orig/kernel/audit.c linux-2.6.9-ve023stab054/kernel/audit.c
--- linux-2.6.9-100.orig/kernel/audit.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/audit.c	2011-06-15 19:26:22.000000000 +0400
@@ -288,9 +288,10 @@ int kauditd_thread(void *dummy)
 					BUG_ON(err != -ECONNREFUSED);
 					printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
 					read_lock(&tasklist_lock);
-					tsk = find_task_by_pid(audit_pid);
-					if (tsk && (tsk->flags & (PF_MEMDIE|PF_MEMALLOC)) == PF_MEMDIE)
-						tsk->flags &= ~PF_MEMDIE;
+					tsk = find_task_by_pid_all(audit_pid);
+					if (tsk && !(tsk->flags & PF_MEMALLOC))
+						clear_tsk_thread_flag(tsk,
+							       TIF_MEMDIE);
 					read_unlock(&tasklist_lock);
 
 					audit_pid = 0;
@@ -478,14 +479,16 @@ static int audit_receive_msg(struct sk_b
 			if (old != audit_pid) {
 				read_lock(&tasklist_lock);
 				if (old) {
-					tsk = find_task_by_pid(old);
-					if (tsk && (tsk->flags & (PF_MEMDIE|PF_MEMALLOC)) == PF_MEMDIE)
-						tsk->flags &= ~PF_MEMDIE;
+					tsk = find_task_by_pid_all(old);
+					if (tsk && !(tsk->flags & PF_MEMALLOC))
+						clear_tsk_thread_flag(tsk,
+							       TIF_MEMDIE);
 				}
 				if (audit_pid) {
-					tsk = find_task_by_pid(audit_pid);
+					tsk = find_task_by_pid_all(audit_pid);
 					if (tsk)
-						tsk->flags |= PF_MEMDIE;
+						set_tsk_thread_flag(tsk,
+							       TIF_MEMDIE);
 				}					
 				read_unlock(&tasklist_lock);
 			}
diff -Nurap linux-2.6.9-100.orig/kernel/capability.c linux-2.6.9-ve023stab054/kernel/capability.c
--- linux-2.6.9-100.orig/kernel/capability.c	2004-10-19 01:54:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/capability.c	2011-06-15 19:26:19.000000000 +0400
@@ -23,6 +23,7 @@ EXPORT_SYMBOL(cap_bset);
  * Locking rule: acquire this prior to tasklist_lock.
  */
 spinlock_t task_capability_lock = SPIN_LOCK_UNLOCKED;
+EXPORT_SYMBOL(task_capability_lock);
 
 /*
  * For sys_getproccap() and sys_setproccap(), any of the three
@@ -59,8 +60,8 @@ asmlinkage long sys_capget(cap_user_head
      spin_lock(&task_capability_lock);
      read_lock(&tasklist_lock); 
 
-     if (pid && pid != current->pid) {
-	     target = find_task_by_pid(pid);
+     if (pid && pid != virt_pid(current)) {
+	     target = find_task_by_pid_ve(pid);
 	     if (!target) {
 	          ret = -ESRCH;
 	          goto out;
@@ -90,11 +91,15 @@ static inline void cap_set_pg(int pgrp, 
 {
 	task_t *g, *target;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
+	pgrp = vpid_to_pid(pgrp);
+	if (pgrp < 0)
+		return;
+
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, g) {
 		target = g;
-		while_each_thread(g, target)
+		while_each_thread_ve(g, target)
 			security_capset_set(target, effective, inheritable, permitted);
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, g);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, g);
 }
 
 /*
@@ -107,11 +112,11 @@ static inline void cap_set_all(kernel_ca
 {
      task_t *g, *target;
 
-     do_each_thread(g, target) {
+     do_each_thread_ve(g, target) {
              if (target == current || target->pid == 1)
                      continue;
 	     security_capset_set(target, effective, inheritable, permitted);
-     } while_each_thread(g, target);
+     } while_each_thread_ve(g, target);
 }
 
 /*
@@ -157,8 +162,8 @@ asmlinkage long sys_capset(cap_user_head
      spin_lock(&task_capability_lock);
      read_lock(&tasklist_lock);
 
-     if (pid > 0 && pid != current->pid) {
-          target = find_task_by_pid(pid);
+     if (pid > 0 && pid != virt_pid(current)) {
+          target = find_task_by_pid_ve(pid);
           if (!target) {
                ret = -ESRCH;
                goto out;
diff -Nurap linux-2.6.9-100.orig/kernel/compat.c linux-2.6.9-ve023stab054/kernel/compat.c
--- linux-2.6.9-100.orig/kernel/compat.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/compat.c	2011-06-15 19:26:21.000000000 +0400
@@ -39,7 +39,7 @@ int put_compat_timespec(const struct tim
 			__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
 }
 
-static long compat_nanosleep_restart(struct restart_block *restart)
+long compat_nanosleep_restart(struct restart_block *restart)
 {
 	unsigned long expire = restart->arg0, now = jiffies;
 	struct compat_timespec __user *rmtp;
@@ -67,6 +67,7 @@ static long compat_nanosleep_restart(str
 	/* The 'restart' block is already filled in */
 	return -ERESTART_RESTARTBLOCK;
 }
+EXPORT_SYMBOL(compat_nanosleep_restart);
 
 asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
 		struct compat_timespec __user *rmtp)
diff -Nurap linux-2.6.9-100.orig/kernel/configs.c linux-2.6.9-ve023stab054/kernel/configs.c
--- linux-2.6.9-100.orig/kernel/configs.c	2004-10-19 01:53:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/configs.c	2011-06-15 19:26:19.000000000 +0400
@@ -89,8 +89,7 @@ static int __init ikconfig_init(void)
 	struct proc_dir_entry *entry;
 
 	/* create the current config file */
-	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
-				  &proc_root);
+	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, NULL);
 	if (!entry)
 		return -ENOMEM;
 
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/Makefile linux-2.6.9-ve023stab054/kernel/cpt/Makefile
--- linux-2.6.9-100.orig/kernel/cpt/Makefile	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/Makefile	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,20 @@
+obj-$(CONFIG_VZ_CHECKPOINT) += vzcpt.o vzrst.o
+
+vzcpt-objs := cpt_proc.o cpt_dump.o cpt_obj.o cpt_context.o cpt_process.o \
+	cpt_mm.o cpt_pagein.o cpt_files.o cpt_kernel.o \
+	cpt_socket.o cpt_socket_in.o cpt_tty.o cpt_sysvipc.o cpt_net.o \
+	cpt_conntrack.o cpt_ubc.o cpt_epoll.o cpt_iterative.o
+
+vzrst-objs := rst_proc.o rst_undump.o cpt_obj.o rst_context.o rst_process.o \
+	rst_mm.o rst_pagein.o rst_files.o cpt_kernel.o \
+	rst_socket.o rst_socket_in.o rst_tty.o rst_sysvipc.o rst_net.o \
+	rst_conntrack.o rst_ubc.o rst_epoll.o rst_iterative.o
+
+ifeq ($(CONFIG_X86_64), y)
+vzcpt-objs += cpt_x8664.o
+vzrst-objs += cpt_x8664.o rst_x8664.o
+endif
+
+ifneq ($(CONFIG_X86_64), y)
+vzrst-objs += rst_i386.o
+endif
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_conntrack.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_conntrack.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_conntrack.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_conntrack.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,368 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/unistd.h>
+#include <linux/ve.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+
+/* How does it work?
+ *
+ * Network is disabled, so new conntrack entries will not appear.
+ * However, some of them can disappear because of timeouts.
+ *
+ * So, we take read_lock, collect all required information atomically,
+ * essentially, creating parallel "refcount" structures holding pointers.
+ * We delete conntrack timers as well, so the structures cannot disappear
+ * after releasing the lock. Now, after releasing lock we can dump everything
+ * safely. And on exit we restore timers to their original values.
+ *
+ * Note, this approach is not going to work in VE0.
+ */
+
+struct ct_holder
+{
+	struct ct_holder *next;
+	struct ip_conntrack_tuple_hash *cth;
+	int index;
+};
+
+static void encode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple)
+{
+	v->cpt_dst = tuple->dst.ip;
+	v->cpt_dstport = tuple->dst.u.all;
+	v->cpt_protonum = tuple->dst.protonum;
+
+	v->cpt_src = tuple->src.ip;
+	v->cpt_srcport = tuple->src.u.all;
+}
+
+static void encode_nat_manip(struct cpt_nat_manip *v, struct ip_nat_info_manip *manip)
+{
+	v->cpt_direction = manip->direction;
+	v->cpt_hooknum = manip->hooknum;
+	v->cpt_maniptype = manip->maniptype;
+
+	v->cpt_manip_addr = manip->manip.ip;
+	v->cpt_manip_port = manip->manip.u.all;
+}
+
+static int dump_one_expect(struct cpt_ip_connexpect_image *v,
+			   struct ip_conntrack_expect *exp,
+			   int sibling, cpt_context_t *ctx)
+{
+	int err = 0;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_NET_CONNTRACK_EXPECT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	encode_tuple(&v->cpt_ct_tuple, &exp->ct_tuple);
+	encode_tuple(&v->cpt_tuple, &exp->tuple);
+	encode_tuple(&v->cpt_mask, &exp->mask);
+	v->cpt_sibling_conntrack = sibling;
+	v->cpt_seq = exp->seq;
+	if (sizeof(v->cpt_help) != sizeof(exp->help)) {
+		eprintk_ctx("conntrack module exp->help version mismatch\n");
+		return -EINVAL;
+	}
+	memcpy(v->cpt_help, &exp->help, sizeof(v->cpt_help));
+	v->cpt_timeout = 0;
+	if (exp->expectant->helper->timeout)
+		v->cpt_timeout = exp->timeout.expires - jiffies;
+	return err;
+}
+
+/* NOTE. We use one page to dump list of expectations. This may be not enough
+ * in theory. In practice there is only one expectation per conntrack record.
+ * Moreover, taking into account that _ALL_ of expecations are saved in one
+ * global list, which is looked up each incoming/outpging packet, the system
+ * would be severely dead when even one conntrack would have so much of
+ * expectations. Shortly, I am not going to repair this.
+ */
+
+static int dump_expect_list(struct ip_conntrack *ct, struct ct_holder *list,
+			    cpt_context_t *ctx)
+{
+	int err = 0;
+	unsigned long pg;
+	struct cpt_ip_connexpect_image *v;
+	struct ip_conntrack_expect *exp;
+
+	if (ct->expecting == 0)
+		return err;
+	if (ct->expecting*sizeof(struct cpt_ip_connexpect_image) > PAGE_SIZE)
+		return -ENOBUFS;
+
+	pg = __get_free_page(GFP_KERNEL);
+	if (!pg)
+		return -ENOMEM;
+	v = (struct cpt_ip_connexpect_image *)pg;
+
+	READ_LOCK(&ip_conntrack_lock);
+	list_for_each_entry(exp, &ct->sibling_list, expected_list) {
+		int sibling;
+
+		if (ct->helper == NULL) {
+			eprintk_ctx("conntrack: no helper and non-trivial expectation\n");
+			err = -EINVAL;
+			break;
+		}
+
+		if (exp->expectant != ct) {
+			eprintk_ctx("exp->expectant != ct\n");
+			err = -EINVAL;
+			break;
+		}
+
+		sibling = 0;
+		if (exp->sibling) {
+			struct ct_holder *c;
+			for (c = list; c; c = c->next) {
+				if (c->cth->ctrack == exp->sibling) {
+					sibling = c->index;
+					break;
+				}
+			}
+			/* NOTE: exp->sibling could be not "confirmed" and, hence,
+			 * out of hash table. We should just ignore such a sibling,
+			 * the connection is going to be retried, the packet
+			 * apparently was lost somewhere.
+			 */
+			 if (sibling == 0)
+				 dprintk_ctx("sibling conntrack is not found\n");
+		}
+
+		/* If the expectation still does not have exp->sibling
+		 * and timer is not running, it is about to die on another
+		 * cpu. Skip it. */
+		if (!sibling &&
+		    ct->helper->timeout &&
+		    !timer_pending(&exp->timeout)) {
+			dprintk_ctx("conntrack: expectation: no timer\n");
+			continue;
+		}
+
+		err = dump_one_expect(v, exp, sibling, ctx);
+		if (err)
+			break;
+
+		v++;
+	}
+	READ_UNLOCK(&ip_conntrack_lock);
+
+	if (err == 0 && (unsigned long)v != pg)
+		ctx->write((void*)pg, (unsigned long)v - pg, ctx);
+
+	free_page(pg);
+	return err;
+}
+
+static int dump_one_ct(struct ct_holder *c, struct ct_holder *list,
+		       cpt_context_t *ctx)
+{
+	struct ip_conntrack_tuple_hash *h = c->cth;
+	struct ip_conntrack *ct = h->ctrack;
+	struct cpt_ip_conntrack_image v;
+	int err = 0;
+	int i;
+
+	if (sizeof(v.cpt_proto_data) != sizeof(ct->proto) + 4) {
+		eprintk_ctx("conntrack module ct->proto version mismatch\n");
+		return -EINVAL;
+	}
+	if (sizeof(v.cpt_help_data) != sizeof(ct->help)) {
+		eprintk_ctx("conntrack module ct->help version mismatch\n");
+		return -EINVAL;
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_CONNTRACK;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	READ_LOCK(&ip_conntrack_lock);
+	v.cpt_status = ct->status;
+	v.cpt_timeout = ct->timeout.expires - jiffies;
+	v.cpt_ct_helper = (ct->helper != NULL);
+	v.cpt_index = c->index;
+	encode_tuple(&v.cpt_tuple[0], &ct->tuplehash[0].tuple);
+	encode_tuple(&v.cpt_tuple[1], &ct->tuplehash[1].tuple);
+	memcpy(&v.cpt_proto_data, &ct->proto, sizeof(v.cpt_proto_data));
+	memcpy(&v.cpt_help_data, &ct->help, sizeof(v.cpt_help_data));
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+	v.cpt_masq_index = ct->nat.masq_index;
+#endif
+	v.cpt_nat_helper = (ct->nat.info.helper != NULL);
+	v.cpt_initialized = ct->nat.info.initialized;
+	v.cpt_num_manips = ct->nat.info.num_manips;
+	v.cpt_initialized = ct->nat.info.initialized;
+	for (i=0; i<ct->nat.info.num_manips; i++)
+		encode_nat_manip(&v.cpt_nat_manips[i], &ct->nat.info.manips[i]);
+	v.cpt_nat_seq[0].cpt_correction_pos = ct->nat.info.seq[0].correction_pos;
+	v.cpt_nat_seq[0].cpt_offset_before = ct->nat.info.seq[0].offset_before;
+	v.cpt_nat_seq[0].cpt_offset_after = ct->nat.info.seq[0].offset_after;
+	v.cpt_nat_seq[1].cpt_correction_pos = ct->nat.info.seq[1].correction_pos;
+	v.cpt_nat_seq[1].cpt_offset_before = ct->nat.info.seq[1].offset_before;
+	v.cpt_nat_seq[1].cpt_offset_after = ct->nat.info.seq[1].offset_after;
+#else
+	v.cpt_initialized = 0;
+	v.cpt_nat_helper = 0;
+#endif
+	READ_UNLOCK(&ip_conntrack_lock);
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	err = dump_expect_list(ct, list, ctx);
+
+	cpt_close_object(ctx);
+	return err;
+}
+
+int cpt_dump_ip_conntrack(cpt_context_t * ctx)
+{
+	struct ct_holder *ct_list = NULL;
+	struct ct_holder *c, **cp;
+	int err = 0;
+	int index = 0;
+	int idx;
+
+	if (get_exec_env()->_ip_conntrack == NULL)
+		return 0;
+
+	for (idx = atomic_read(&(get_exec_env()->_ip_conntrack->_ip_conntrack_count)); idx >= 0; idx--) {
+		c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
+		if (c == NULL) {
+			err = -ENOMEM;
+			goto done;
+		}
+		memset(c, 0, sizeof(struct ct_holder));
+		c->next = ct_list;
+		ct_list = c;
+	}
+
+	c = ct_list;
+
+	READ_LOCK(&ip_conntrack_lock);
+	for (idx = 0; idx < ip_conntrack_htable_size; idx++) {
+		struct ip_conntrack_tuple_hash *h;
+		list_for_each_entry(h, &ve_ip_conntrack_hash[idx], list) {
+			/* Skip reply tuples, they are covered by original
+			 * direction. */
+			if (DIRECTION(h))
+				continue;
+
+			/* Oops, we have not enough of holders...
+			 * It is impossible. */
+			if (unlikely(c == NULL)) {
+				READ_UNLOCK(&ip_conntrack_lock);
+				eprintk_ctx("unexpected conntrack appeared\n");
+				err = -ENOMEM;
+				goto done;
+			}
+
+			/* If timer is not running, it means that it
+			 * has just been scheduled on another cpu.
+			 * We should skip this conntrack, it is about to be
+			 * destroyed. */
+			if (!del_timer(&h->ctrack->timeout)) {
+				dprintk_ctx("conntrack: no timer\n");
+				continue;
+			}
+
+			/* Timer is deleted. refcnt is _not_ decreased.
+			 * We are going to restore the timer on exit
+			 * from this function. */
+			c->cth = h;
+			c->index = ++index;
+			c = c->next;
+		}
+	}
+	READ_UNLOCK(&ip_conntrack_lock);
+
+	/* No conntracks? Good. */
+	if (index == 0)
+		goto done;
+
+	/* Comb the list a little. */
+	cp = &ct_list;
+	while ((c = *cp) != NULL) {
+		/* Discard unused entries; they can appear, if some
+		 * entries were timed out since we preallocated the list.
+		 */
+		if (c->cth == NULL) {
+			*cp = c->next;
+			kfree(c);
+			continue;
+		}
+
+		/* Move conntracks attached to expectations to the beginning
+		 * of the list. */
+		if (c->cth->ctrack->master && c != ct_list) {
+			*cp = c->next;
+			c->next = ct_list;
+			ct_list = c;
+			dprintk_ctx("conntrack: %d moved in list\n", c->index);
+			continue;
+		}
+		cp = &c->next;
+	}
+
+	cpt_open_section(ctx, CPT_SECT_NET_CONNTRACK);
+
+	for (c = ct_list; c; c = c->next) {
+		err = dump_one_ct(c, ct_list, ctx);
+		if (err)
+			goto done;
+	}
+
+	cpt_close_section(ctx);
+
+done:
+	while ((c = ct_list) != NULL) {
+		ct_list = c->next;
+		if (c->cth) {
+			/* Restore timer. refcnt is preserved. */
+			add_timer(&c->cth->ctrack->timeout);
+		}
+		kfree(c);
+	}
+	return err;
+}
+
+#endif
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_context.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_context.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_context.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_context.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,238 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+
+static void file_write(const void *addr, size_t count, struct cpt_context *ctx)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->write(file, addr, count, &file->f_pos);
+	set_fs(oldfs);
+	if (err != count && !ctx->write_error)
+		ctx->write_error = err < 0 ? err : -EIO;
+}
+
+static void file_pwrite(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->write(file, addr, count, &pos);
+	set_fs(oldfs);
+	if (err != count && !ctx->write_error)
+		ctx->write_error = err < 0 ? err : -EIO;
+}
+
+static void file_align(struct cpt_context *ctx)
+{
+	struct file *file = ctx->file;
+
+	if (file)
+		file->f_pos = CPT_ALIGN(file->f_pos);
+}
+
+void cpt_context_init(struct cpt_context *ctx)
+{
+	int i;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	init_MUTEX(&ctx->main_sem);
+	ctx->refcount = 1;
+
+	ctx->current_section = -1;
+	ctx->current_object = -1;
+	ctx->pagesize = PAGE_SIZE;
+	ctx->write = file_write;
+	ctx->pwrite = file_pwrite;
+	ctx->align = file_align;
+	for (i=0; i < CPT_SECT_MAX; i++)
+		ctx->sections[i] = CPT_NULL;
+	init_completion(&ctx->pgin_notify);
+	cpt_object_init(ctx);
+}
+
+int cpt_open_dumpfile(struct cpt_context *ctx)
+{
+	ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
+	if (ctx->tmpbuf == NULL)
+		return -ENOMEM;
+	__cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_close_dumpfile(struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		fput(ctx->file);
+		ctx->file = NULL;
+	}
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+	if (ctx->write_error)
+		eprintk_ctx("error while writing dump file: %d\n", ctx->write_error);
+	return ctx->write_error;
+}
+
+int cpt_major_hdr_out(struct cpt_context *ctx)
+{
+	struct cpt_major_hdr hdr;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.cpt_signature[0] = CPT_SIGNATURE0;
+	hdr.cpt_signature[1] = CPT_SIGNATURE1;
+	hdr.cpt_signature[2] = CPT_SIGNATURE2;
+	hdr.cpt_signature[3] = CPT_SIGNATURE3;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_image_version = CPT_VERSION_9_1;
+#ifdef CONFIG_X86_64
+	hdr.cpt_os_arch = CPT_OS_ARCH_EMT64;
+#else
+	hdr.cpt_os_arch = CPT_OS_ARCH_I386;
+#endif
+	hdr.cpt_ve_features = (__u32)ctx->features;
+	hdr.cpt_ve_features2 = (__u32)(ctx->features>>32);
+	hdr.cpt_pagesize = PAGE_SIZE;
+	hdr.cpt_hz = HZ;
+	hdr.cpt_start_jiffies64 = ctx->virt_jiffies64;
+	hdr.cpt_start_sec = ctx->start_time.tv_sec;
+	hdr.cpt_start_nsec = ctx->start_time.tv_nsec;
+	hdr.cpt_cpu_caps[0] = ctx->src_cpu_flags;
+	hdr.cpt_kernel_config[0] = ctx->kernel_config_flags;
+	hdr.cpt_iptables_mask = ctx->iptables_mask;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	return 0;
+}
+
+int cpt_close_section(struct cpt_context *ctx)
+{
+	if (ctx->file && ctx->current_section >= 0) {
+		__u64 next = ctx->file->f_pos - ctx->current_section;
+		ctx->pwrite(&next, 8, ctx, ctx->current_section);
+		ctx->current_section = -1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_close_section);
+
+int cpt_open_section(struct cpt_context *ctx, __u32 type)
+{
+	struct cpt_section_hdr hdr;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_close_section(ctx);
+
+	ctx->current_section = ctx->file->f_pos;
+	ctx->sections[type] = ctx->current_section;
+
+	hdr.cpt_next = 0;
+	hdr.cpt_section = type;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_align = 0;
+	ctx->write(&hdr, sizeof(hdr), ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL(cpt_open_section);
+
+
+int cpt_close_object(struct cpt_context *ctx)
+{
+	if (ctx->file && ctx->current_object >= 0) {
+		__u64 next = ctx->file->f_pos - ctx->current_object;
+		ctx->pwrite(&next, 8, ctx, ctx->current_object);
+		ctx->current_object = -1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_close_object);
+
+int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_close_object(ctx);
+
+	ctx->current_object = ctx->file->f_pos;
+	if (obj)
+		cpt_obj_setpos(obj, ctx->current_object, ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL(cpt_open_object);
+
+int cpt_push_object(loff_t *saved, struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		*saved = ctx->current_object;
+		ctx->current_object = ctx->file->f_pos;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_push_object);
+
+int cpt_pop_object(loff_t *saved, struct cpt_context *ctx)
+{
+	ctx->current_object = *saved;
+	return 0;
+}
+EXPORT_SYMBOL(cpt_pop_object);
+
+int cpt_dump_tail(struct cpt_context *ctx)
+{
+	struct cpt_major_tail hdr;
+	int i;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_open_section(ctx, CPT_SECT_TRAILER);
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.cpt_next = sizeof(hdr);
+	hdr.cpt_object = CPT_OBJ_TRAILER;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = CPT_CONTENT_VOID;
+	hdr.cpt_lazypages = ctx->lazypages;
+	hdr.cpt_64bit = ctx->tasks64;
+	hdr.cpt_signature[0] = CPT_SIGNATURE0;
+	hdr.cpt_signature[1] = CPT_SIGNATURE1;
+	hdr.cpt_signature[2] = CPT_SIGNATURE2;
+	hdr.cpt_signature[3] = CPT_SIGNATURE3;
+	hdr.cpt_nsect = CPT_SECT_MAX_INDEX;
+	for (i = 0; i < CPT_SECT_MAX_INDEX; i++)
+		hdr.cpt_sections[i] = ctx->sections[i];
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_context.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_context.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_context.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_context.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,205 @@
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+#include <ub/beancounter.h>
+
+#define	CPT_CTX_ERROR		-1
+#define	CPT_CTX_IDLE		0
+#define CPT_CTX_SUSPENDING	1
+#define	CPT_CTX_SUSPENDED	2
+#define CPT_CTX_DUMPING		3
+#define CPT_CTX_UNDUMPING	4
+#define CPT_CTX_UNDUMPED	5
+
+#define CPT_TID(tsk)   (tsk)->pid, virt_pid(tsk), (tsk)->comm
+#define CPT_FID		"%d,%d(%s)"
+
+
+typedef struct cpt_context
+{
+	struct list_head ctx_list;
+	int	refcount;
+	int	ctx_state;
+	int	objcount;
+	int	sticky;
+	struct semaphore main_sem;
+
+	struct file *errorfile;
+	struct file *statusfile;
+	struct file *lockfile;
+
+	int	errno;
+	char	*error_msg;
+	loff_t	err_offset;
+
+	struct file	*file;
+	struct file	*pagein_file_in;
+	struct file	*pagein_file_out;
+	char		*tmpbuf;
+	int		pagesize;
+
+	int		iter_done;
+	void		*iter_dir;
+	struct user_beancounter *iter_ub;
+
+	loff_t		current_section;
+	loff_t		current_object;
+
+	loff_t		sections[CPT_SECT_MAX];
+
+	__u32		errormask;
+	__u32		write_error;
+
+	struct list_head object_array[CPT_OBJ_MAX];
+
+	void		(*write)(const void *addr, size_t count, struct cpt_context *ctx);
+	void		(*pwrite)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
+	ssize_t		(*read)(void *addr, size_t count, struct cpt_context *ctx);
+	ssize_t		(*pread)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
+	void		(*align)(struct cpt_context *ctx);
+	int		ve_id;
+	int		contextid;
+	__u64		cpt_jiffies64; 	/* Host jiffies64 at the moment of cpt/rst,
+					 * corresponging to start_time */
+	__u64		virt_jiffies64;	/* Virtual jiffies64. It is == cpt_jiffies64 when
+					 * VE did not migrate. */
+	struct timespec	start_time;
+	struct timespec delta_time;
+	__s64		delta_nsec;
+	int		image_version;
+	__u64		iptables_mask;
+	__u64		features;
+
+#define CPT_ANONVMA_HBITS (sizeof(void*) == 4 ? 10 : 9)
+#define CPT_ANONVMA_HSIZE (1<<CPT_ANONVMA_HBITS)
+	struct hlist_head *anonvmas;
+	int		lazy_vm;
+	int		lazypages;
+	int		tasks64;
+	int		lazytype;
+	task_t		*pgin_task;
+	unsigned long	last_pagein;
+	__u32		src_cpu_flags;
+	__u32		dst_cpu_flags;
+	__u32		kernel_config_flags;
+	struct pagein_desc	**pgin_dir;
+	struct pgin_device	*pagein_dev;
+	struct completion	pgin_notify;
+	struct completion	*pgind_completion;
+	struct swap_info_struct	*pgin_swp;
+
+	struct filejob  *filejob_queue;
+
+	int		slm_count;
+
+	char *vdso;
+
+	/* Store here ubc limits and barriers during undumping,
+	   and restore them before resuming */
+	struct ubparm	saved_ubc[UB_RESOURCES];
+} cpt_context_t;
+
+typedef struct {
+	int pid;
+	cpt_context_t *ctx;
+	struct completion done;
+} pagein_info_t;
+
+int pagein_info_printf(char *buf, cpt_context_t *ctx);
+
+int cpt_open_dumpfile(struct cpt_context *);
+int cpt_close_dumpfile(struct cpt_context *);
+int rst_open_dumpfile(struct cpt_context *);
+void rst_close_dumpfile(struct cpt_context *);
+void cpt_context_init(struct cpt_context *);
+void rst_context_init(struct cpt_context *);
+void cpt_context_destroy(struct cpt_context *);
+
+void rst_report_error(int err, cpt_context_t *ctx);
+
+
+int cpt_major_hdr_out(struct cpt_context *ctx);
+int cpt_dump_tail(struct cpt_context *ctx);
+int cpt_close_section(struct cpt_context *ctx);
+int cpt_open_section(struct cpt_context *ctx, __u32 type);
+int cpt_close_object(struct cpt_context *ctx);
+int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx);
+int cpt_push_object(loff_t *saved, struct cpt_context *ctx);
+int cpt_pop_object(loff_t *saved, struct cpt_context *ctx);
+
+int rst_get_section(int type, struct cpt_context * ctx, loff_t *, loff_t *);
+__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx);
+__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx);
+void rst_put_name(__u8 *name, struct cpt_context *ctx);
+int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx);
+void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx);
+
+#define rst_get_object(type, pos, tmp, ctx) \
+ _rst_get_object((type), (pos), (tmp), sizeof(*(tmp)), (ctx))
+
+extern int debug_level;
+
+#define cpt_printk(lvl, fmt, args...)	do {	\
+		if (lvl <= debug_level)		\
+			printk(fmt, ##args);	\
+	} while (0)
+
+#define dprintk(a...) cpt_printk(3, "CPT DBG: " a)
+#define dprintk_ctx(f, arg...) dprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
+
+#define wprintk(a...) cpt_printk(2, "CPT WRN: " a)
+#define wprintk_ctx(f, arg...) wprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
+
+#define eprintk(a...) cpt_printk(1, "CPT ERR: " a)
+#define eprintk_ctx(f, arg...)						\
+do {									\
+	eprintk("%p,%u :" f, ctx, ctx->ve_id, ##arg);			\
+	if (ctx->error_msg && ctx->err_offset < PAGE_SIZE)		\
+		ctx->err_offset += snprintf((char*)(ctx->error_msg +	\
+				ctx->err_offset),			\
+			       	PAGE_SIZE - ctx->err_offset, f, ##arg);	\
+} while(0)
+
+#define CPT_TMPBUF_FREE 0x789adf12
+#define CPT_TMPBUF_BUSY 0xabcd9876
+
+static inline void *cpt_get_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_FREE);
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_BUSY;
+	return buf;
+}
+
+static inline void __cpt_release_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
+}
+
+static inline void cpt_release_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_BUSY);
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
+}
+
+static inline void cpt_flush_error(cpt_context_t *ctx)
+{
+	mm_segment_t oldfs;
+
+	if (ctx->errorfile && ctx->error_msg && ctx->err_offset) {
+		if (ctx->errorfile->f_op && ctx->errorfile->f_op->write) {
+			oldfs = get_fs();
+			set_fs(KERNEL_DS);
+			ctx->errorfile->f_op->write(ctx->errorfile,
+				ctx->error_msg, ctx->err_offset,
+				&ctx->errorfile->f_pos);
+			set_fs(oldfs);
+		}
+		ctx->error_msg[0] = 0;
+		ctx->err_offset = 0;
+	}
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_dump.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_dump.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_dump.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_dump.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,1102 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/ptrace.h>
+#include <linux/smp_lock.h>
+#include <linux/ve.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <ub/ub_task.h>
+#include <linux/cpt_image.h>
+#include <linux/namespace.h>
+#include <linux/netdevice.h>
+#include <linux/nfcalls.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_process.h"
+#include "cpt_net.h"
+#include "cpt_socket.h"
+#include "cpt_ubc.h"
+#include "cpt_kernel.h"
+
+
+static int vps_child_level(task_t *root, task_t *c)
+{
+	int level = 0;
+	int veid = VE_TASK_INFO(c)->owner_env->veid;
+
+	while (VE_TASK_INFO(c)->owner_env->veid == veid) {
+		if (c->pid != c->tgid)
+			c = c->group_leader;
+		if (c == root)
+			return level;
+
+		c = c->real_parent;
+		level++;
+	}
+	return -1;
+}
+
+static inline int freezable(struct task_struct * p)
+{
+	if (p->exit_state)
+		return 0;
+
+	switch (p->state) {
+	case EXIT_ZOMBIE:
+	case EXIT_DEAD:
+	case TASK_STOPPED:
+#if TASK_TRACED != TASK_STOPPED
+	case TASK_TRACED:
+#endif
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static void wake_ve(cpt_context_t *ctx)
+{
+	task_t *p, *g;
+
+	do_each_thread_ve(g, p) {
+		spin_lock_irq(&p->sighand->siglock);
+		if (p->flags & PF_FROZEN) {
+			p->flags &= ~PF_FROZEN;
+			wake_up_process(p);
+		}
+		spin_unlock_irq(&p->sighand->siglock);
+	} while_each_thread_ve(g, p);
+}
+
+/*
+ * Some comment is necessary about PF_FREEZE,PF_FROZEN,TIF_FREEZE...
+ *
+ * SWSUSP uses PF_FREEZE flag in tsk->flags raising it in context
+ * of another process. Apparently, it is unacceptable on SMP.
+ * Let's take freeze_processes() in kernel/power/process.c as an example.
+ * Unserialized modifications tsk->flags easily
+ * (believe or not, but it happens with probability of almost 100% :-))
+ * creates the situation when setting PF_FREEZE in freeze_processes(),
+ * which quickly spins raising PF_FREEZE of all the processes,
+ * _clears_ PF_FROZEN just set in refrigerator(), so that suspend deadlocks.
+ *
+ * So, to make things clean, we require that those flags may be modified
+ * only under tsk->sighand->siglock, which is quite natural because PF_FREEZE
+ * is just a kind of signal.
+ *
+ * It is not enough, because we are still not allowed to change tsk->flags
+ * in context of another process, we can corrupt another flags, when the process
+ * running on another cpu modifies them. So, we use TIF_FREEZE in thread flags,
+ * which can be changed atomically.
+ *
+ * PF_FROZEN also changes in context of another process, but this happens
+ * only when the process is already in refrigerator() which does not modify
+ * tsk->flags.
+ */
+
+static int vps_stop_tasks(struct cpt_context *ctx)
+{
+	unsigned long start_time = jiffies;
+	task_t *p, *g;
+	int todo;
+	int round = 0;
+
+	do_gettimespec(&ctx->start_time); 
+	ctx->cpt_jiffies64 = get_jiffies_64();
+	ctx->virt_jiffies64 = ctx->cpt_jiffies64 + get_exec_env()->jiffies_fixup;
+
+	read_lock(&tasklist_lock);
+
+	atomic_inc(&get_exec_env()->suspend);
+
+	for(;;) {
+		task_t *root;
+		todo = 0;
+
+		root = find_task_by_pid_ve(1);
+		if (!root) {
+			read_unlock(&tasklist_lock);
+			eprintk_ctx("cannot find ve init\n");
+			atomic_dec(&get_exec_env()->suspend);
+			return -ESRCH;
+		}
+
+		do_each_thread_ve(g, p) {
+			if (vps_child_level(root, p) >= 0) {
+				if (!is_virtual_pid(virt_pid(p))) {
+					eprintk_ctx("external process %d/%d(%s) inside VPS (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
+					todo = -1;
+					goto out;
+				}
+				if (!is_virtual_pid(virt_pgid(p))) {
+					eprintk_ctx("external process group %d/%d(%s) inside VPS (e.g. vzctl enter or vzctl exec).\n", virt_pgid(p), p->pid, p->comm);
+					todo = -1;
+					goto out;
+				}
+				if (!is_virtual_pid(virt_sid(p))) {
+					eprintk_ctx("external process session %d/%d(%s) inside VPS (e.g. vzctl enter or vzctl exec).\n", virt_sid(p), p->pid, p->comm);
+					todo = -1;
+					goto out;
+				}
+				if (task_aux(p)->vfork_done) {
+					/* Task between vfork()...exec()
+					 * cannot be frozen, because parent
+					 * wait in uninterruptible state.
+					 * So, we do nothing, waiting for
+					 * exec(), unless:
+					 */
+					if (p->state == TASK_STOPPED ||
+					    p->state == TASK_TRACED) {
+						eprintk_ctx("task %d/%d(%s) is stopped while vfork(). Checkpointing is impossible.\n", virt_pid(p), p->pid, p->comm);
+						todo = -1;
+						/* It is fatal, _user_ stopped
+						 * vfork()ing task, so that we
+						 * cannot suspend now.
+						 */
+					} else {
+						todo = -3;
+					}
+					goto out;
+				}
+				if (p->state == TASK_TRACED
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+				    && !p->stopped_state
+#endif
+				    ) {
+					int ptrace_id = p->pn_state;
+					/* Debugger waits for signal. */
+					switch (ptrace_id) {
+					case PN_STOP_TF:
+					case PN_STOP_TF_RT:
+					case PN_STOP_ENTRY:
+					case PN_STOP_FORK:
+					case PN_STOP_VFORK:
+					case PN_STOP_SIGNAL:
+					case PN_STOP_EXIT:
+					case PN_STOP_LEAVE:
+						break;
+					default:
+						eprintk_ctx("task %d/%d(%s) is stopped by debugger while %d.\n", virt_pid(p), p->pid, p->comm, ptrace_id);
+						todo = -1;
+						goto out;
+					}
+				}
+				if (p->flags & PF_NOFREEZE) {
+					todo = -1;
+					goto out;
+				}
+
+				if (!freezable(p))
+					continue;
+
+				spin_lock_irq(&p->sighand->siglock);
+				if (!(p->flags & PF_FROZEN)) {
+					set_tsk_thread_flag(p, TIF_FREEZE);
+					signal_wake_up(p, 0);
+				}
+				spin_unlock_irq(&p->sighand->siglock);
+
+				if (p->flags & PF_FROZEN) {
+					if (p->state != TASK_UNINTERRUPTIBLE)
+						printk("BUG: Holy Crap 1 %ld " CPT_FID "\n", p->state, CPT_TID(p));
+					continue;
+				}
+
+				if (round == 10)
+					wprintk_ctx("%d/%d(%s) is running\n", virt_pid(p), p->pid, p->comm);
+
+				todo++;
+			} else {
+				if (p != current) {
+					eprintk_ctx("foreign process %d/%d(%s) inside VPS (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
+					todo = -1;
+					goto out;
+				}
+			}
+		} while_each_thread_ve(g, p);
+
+out:
+		if (todo &&
+		    (time_after(jiffies, start_time + 10*HZ) ||
+		     signal_pending(current) || todo < 0)) {
+			atomic_dec(&get_exec_env()->suspend);
+			wake_ve(ctx);
+			if (todo > 0)
+				todo = -2;
+#if 0
+			/* This is sign of failure of printk(), which is not
+			 * ours. So, no prefixes. */
+			printk(">\n");
+#endif
+		}
+
+		read_unlock(&tasklist_lock);
+
+		if (!todo) {
+			atomic_dec(&get_exec_env()->suspend);
+			return 0;
+		}
+
+		if (todo == -1) {
+			eprintk_ctx("suspend is impossible now.\n");
+			return -EAGAIN;
+		}
+
+		if (todo == -2) {
+			eprintk_ctx("interrupted or timed out.\n");
+			return -EINTR;
+		}
+
+		if (todo == -3) {
+			if (time_after(jiffies, start_time + 10*HZ) ||
+			    signal_pending(current)) {
+				eprintk_ctx("vfork() is active, suspend is impossible now.\n");
+				return -EAGAIN;
+			}
+		}
+
+		if (todo < 0)
+			atomic_inc(&get_exec_env()->suspend);
+
+		if (todo < 0 || round > 0) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(HZ/50);
+		} else {
+			yield();
+		}
+
+		read_lock(&tasklist_lock);
+		round++;
+	}
+}
+
+static int cpt_unlock_ve(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	down_write(&env->op_sem);
+	env->is_locked = 0;
+	up_write(&env->op_sem);
+	put_ve(env);
+	return 0;
+}
+
+int cpt_resume(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	virtinfo_notifier_call(VITYPE_SCP, VIRTINFO_SCP_DMPFIN, ctx);
+
+	cpt_unlock_sockets(ctx);
+
+	if (ctx->pgin_task) {
+		wait_for_completion(&ctx->pgin_notify);
+		put_task_struct(ctx->pgin_task);
+		ctx->pgin_task = NULL;
+	}
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+
+		if (tsk->exit_state) {
+			put_task_struct(tsk);
+			continue;
+		}
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		if (tsk->flags & PF_FROZEN) {
+			tsk->flags &= ~PF_FROZEN;
+			wake_up_process(tsk);
+		} else if (freezable(tsk)) {
+			eprintk_ctx("strange, %s not frozen\n", tsk->comm );
+		}
+		spin_unlock_irq(&tsk->sighand->siglock);
+		put_task_struct(tsk);
+	}
+
+	cpt_resume_network(ctx);
+
+	cpt_unlock_ve(ctx);
+
+	cpt_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+	return 0;
+}
+
+int cpt_kill(struct cpt_context *ctx)
+{
+	int err = 0;
+	struct ve_struct *env;
+	cpt_object_t *obj;
+	task_t *root_task = NULL;
+	long delay;
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+
+	/* from here cpt_kill succeeds */
+	virtinfo_notifier_call(VITYPE_SCP, VIRTINFO_SCP_DMPFIN, ctx);
+
+	if (VE_TASK_INFO(current)->owner_env == env) {
+		wprintk_ctx("attempt to kill ve from inside, escaping...\n");
+
+		write_lock_irq(&tasklist_lock);
+		VE_TASK_INFO(current)->owner_env = get_ve0();
+		REMOVE_VE_LINKS(current);
+		SET_VE_LINKS(current);
+
+		atomic_inc(&get_ve0()->pcounter);
+		atomic_dec(&env->pcounter);
+		write_unlock_irq(&tasklist_lock);
+		set_exec_env(get_ve0());
+	}
+
+	if (ctx->pgin_task) {
+		wait_for_completion(&ctx->pgin_notify);
+		put_task_struct(ctx->pgin_task);
+		ctx->pgin_task = NULL;
+	}
+
+	cpt_kill_sockets(ctx);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+
+		if (tsk->exit_state) {
+			put_task_struct(tsk);
+			continue;
+		}
+
+#ifndef CONFIG_X86_64
+		/* Write bogus value into eip, the task must not reach it,
+		 * this can happen only as result of a bug in signal delivery.
+		 */
+		task_pt_regs(tsk)->eip = 0x1234;
+#endif
+
+		if (virt_pid(tsk) == 1) {
+			root_task = tsk;
+			continue;
+		}
+
+		if (tsk->ptrace) {
+			write_lock_irq(&tasklist_lock);
+			tsk->ptrace = 0;
+			if (!list_empty(&tsk->ptrace_list)) {
+				list_del_init(&tsk->ptrace_list);
+				REMOVE_LINKS(tsk);
+				tsk->parent = tsk->real_parent;
+				SET_LINKS(tsk);
+			}
+			write_unlock_irq(&tasklist_lock);
+		}
+
+		send_sig(SIGKILL, tsk, 1);
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		sigfillset(&tsk->blocked);
+		sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
+		set_tsk_thread_flag(tsk, TIF_SIGPENDING);
+		if (tsk->flags & PF_FROZEN)
+			tsk->flags &= ~PF_FROZEN;
+		spin_unlock_irq(&tsk->sighand->siglock);
+
+		wake_up_process(tsk);
+		put_task_struct(tsk);
+	}
+
+	yield();
+
+	if (root_task != NULL) {
+		send_sig(SIGKILL, root_task, 1);
+
+		spin_lock_irq(&root_task->sighand->siglock);
+		sigfillset(&root_task->blocked);
+		sigdelsetmask(&root_task->blocked, sigmask(SIGKILL));
+		set_tsk_thread_flag(root_task, TIF_SIGPENDING);
+		if (root_task->flags & PF_FROZEN)
+			root_task->flags &= ~PF_FROZEN;
+		spin_unlock_irq(&root_task->sighand->siglock);
+
+		wake_up_process(root_task);
+		put_task_struct(root_task);
+	}
+
+	cpt_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+
+	delay = 1;
+	while (atomic_read(&env->counter) != 1) {
+		if (signal_pending(current))
+			break;
+		current->state = TASK_INTERRUPTIBLE;
+		delay = (delay < HZ) ? (delay << 1) : HZ;
+		schedule_timeout(delay);
+	}
+	put_ve(env);
+
+	return err;
+}
+
+static void collect_task_ubc(task_t *t, struct cpt_context *ctx)
+{
+	struct task_beancounter *tbc;
+
+	tbc = task_bc(t);
+	cpt_add_ubc(tbc->exec_ub, ctx);
+	cpt_add_ubc(tbc->task_ub, ctx);
+	cpt_add_ubc(tbc->fork_sub, ctx);
+}
+
+static cpt_object_t * remember_task(task_t * child, cpt_object_t * head,
+				    cpt_context_t * ctx)
+{
+	cpt_object_t *cobj;
+
+	if (freezable(child) && !(child->flags&PF_FROZEN)) {
+		eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(child));
+		put_task_struct(child);
+		return NULL;
+	}
+
+	if (lookup_cpt_object(CPT_OBJ_TASK, child, ctx)) BUG();
+	if ((cobj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
+		put_task_struct(child);
+		return NULL;
+	}
+	cobj->o_count = 1;
+	cpt_obj_setobj(cobj, child, ctx);
+	insert_cpt_object(CPT_OBJ_TASK, cobj, head, ctx);
+	collect_task_ubc(child, ctx);
+	return cobj;
+}
+
+static int vps_collect_tasks(struct cpt_context *ctx)
+{
+	int err = -ESRCH;
+	cpt_object_t *obj;
+	task_t *root;
+
+	read_lock(&tasklist_lock);
+	root = find_task_by_pid_ve(1);
+	if (root)
+		get_task_struct(root);
+	read_unlock(&tasklist_lock);
+
+	if (!root) {
+		err = -ESRCH;
+		eprintk_ctx("vps_collect_tasks: cannot find root\n");
+		goto out;
+	}
+
+	if ((obj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
+		put_task_struct(root);
+		return -ENOMEM;
+	}
+	obj->o_count = 1;
+	cpt_obj_setobj(obj, root, ctx);
+	intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
+	collect_task_ubc(root, ctx);
+
+	/* Collect process subtree recursively */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		cpt_object_t *head = obj;
+		task_t *tsk = obj->o_obj;
+		task_t *child;
+
+		if (freezable(tsk) && !(tsk->flags&PF_FROZEN)) {
+			eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(tsk));
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (tsk->state == TASK_RUNNING)
+			printk("BUG: Holy Crap 2 %ld " CPT_FID "\n", tsk->state, CPT_TID(tsk));
+
+		wait_task_inactive(tsk);
+
+		err = check_task_state(tsk, ctx);
+		if (err)
+			goto out;
+
+		if (tsk->pid == tsk->tgid) {
+			child = tsk;
+			for (;;) {
+				read_lock(&tasklist_lock);
+				child = next_thread(child);
+				if (child != tsk)
+					get_task_struct(child);
+				read_unlock(&tasklist_lock);
+
+				if (child == tsk)
+					break;
+
+				if (child->real_parent != tsk->real_parent) {
+					put_task_struct(child);
+					eprintk_ctx("illegal thread structure, kernel bug\n");
+					err = -EINVAL;
+					goto out;
+				}
+
+				if ((head = remember_task(child, head, ctx)) == NULL) {
+					eprintk_ctx("task obj allocation failure\n");
+					err = -ENOMEM;
+					goto out;
+				}
+			}
+		}
+
+		/* About locking. VE is frozen. But lists of children
+		 * may change at least for init, when entered task reparents
+		 * to init and when reparented task exits. If we take care
+		 * of this case, we still can unlock while scanning
+		 * tasklists.
+		 */
+		read_lock(&tasklist_lock);
+		list_for_each_entry(child, &tsk->children, sibling) {
+			if (child->real_parent != tsk)
+				continue;
+			if (child->pid != child->tgid)
+				continue;
+			get_task_struct(child);
+			read_unlock(&tasklist_lock);
+
+			if ((head = remember_task(child, head, ctx)) == NULL) {
+				eprintk_ctx("task obj allocation failure\n");
+				err = -ENOMEM;
+				goto out;
+			}
+
+			read_lock(&tasklist_lock);
+		}
+
+		list_for_each_entry(child, &tsk->ptrace_children, ptrace_list) {
+			if (child->real_parent != tsk)
+				continue;
+			if (child->pid != child->tgid)
+				continue;
+			get_task_struct(child);
+			read_unlock(&tasklist_lock);
+
+			if ((head = remember_task(child, head, ctx)) == NULL) {
+				eprintk_ctx("task obj allocation failure\n");
+				err = -ENOMEM;
+				goto out;
+			}
+
+			read_lock(&tasklist_lock);
+		}
+		read_unlock(&tasklist_lock);
+	}
+
+	return 0;
+
+out:
+	while (!list_empty(&ctx->object_array[CPT_OBJ_TASK])) {
+		struct list_head *head = ctx->object_array[CPT_OBJ_TASK].next;
+		cpt_object_t *obj = list_entry(head, cpt_object_t, o_list);
+		task_t *tsk;
+
+		list_del(head);
+		tsk = obj->o_obj;
+		put_task_struct(tsk);
+		free_cpt_object(obj, ctx);
+	}
+	return err;
+}
+
+static int cpt_collect(struct cpt_context *ctx)
+{
+	int err;
+
+	if ((err = cpt_collect_mm(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_sysv(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_files(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_fs(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_namespace(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_signals(ctx)) != 0)
+		return err;
+
+	if (virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_COLLECT, ctx) & NOTIFY_FAIL)
+		return -ECHRNG;
+
+	return 0;
+}
+
+static int cpt_dump_veinfo(cpt_context_t *ctx)
+{
+	struct cpt_veinfo_image i;
+	struct ve_struct *ve;
+	struct timespec delta;
+
+	cpt_open_section(ctx, CPT_SECT_VEINFO);
+	cpt_open_object(NULL, ctx);
+
+	i.cpt_next = CPT_NULL;
+	i.cpt_object = CPT_OBJ_VEINFO;
+	i.cpt_hdrlen = sizeof(i);
+	i.cpt_content = CPT_CONTENT_VOID;
+
+	ve = get_exec_env();
+	i.shm_ctl_all = ve->_shm_ctlall;
+	i.shm_ctl_max = ve->_shm_ctlmax;
+	i.shm_ctl_mni = ve->_shm_ctlmni;
+
+	i.msg_ctl_max = ve->_msg_ctlmax;
+	i.msg_ctl_mni = ve->_msg_ctlmni;
+	i.msg_ctl_mnb = ve->_msg_ctlmnb;
+
+	BUG_ON(sizeof(ve->_sem_ctls) != sizeof(i.sem_ctl_arr));
+	i.sem_ctl_arr[0] = ve->_sem_ctls[0];
+	i.sem_ctl_arr[1] = ve->_sem_ctls[1];
+	i.sem_ctl_arr[2] = ve->_sem_ctls[2];
+	i.sem_ctl_arr[3] = ve->_sem_ctls[3];
+
+	do_posix_clock_monotonic_gettime(&delta);
+	set_normalized_timespec(&delta,
+			delta.tv_sec - ve->start_timespec.tv_sec,
+			delta.tv_nsec - ve->start_timespec.tv_nsec);
+	i.start_timespec_delta = cpt_timespec_export(&delta);
+	i.start_jiffies_delta = get_jiffies_64() - ve->start_jiffies;
+
+	ctx->write(&i, sizeof(i), ctx);
+	cpt_close_object(ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int cpt_dump_utsname(cpt_context_t *ctx)
+{
+	int len;
+	struct cpt_object_hdr o;
+
+	cpt_open_section(ctx, CPT_SECT_UTSNAME);
+
+	cpt_open_object(NULL, ctx);
+	len = strlen(ve_utsname.nodename);
+	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(ve_utsname.nodename, len+1, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+
+	cpt_open_object(NULL, ctx);
+	len = strlen(ve_utsname.domainname);
+	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(ve_utsname.domainname, len+1, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int cpt_dump_vsyscall(cpt_context_t *ctx)
+{
+	struct cpt_page_block *pgb = cpt_get_buf(ctx);
+
+	cpt_open_section(ctx, CPT_SECT_VSYSCALL);
+	cpt_open_object(NULL, ctx);
+
+	pgb->cpt_next = CPT_NULL;
+	pgb->cpt_object = CPT_OBJ_VSYSCALL;
+	pgb->cpt_hdrlen = sizeof(*pgb);
+	pgb->cpt_content = CPT_CONTENT_DATA;
+	pgb->cpt_start = cpt_ptr_export(vsyscall_addr);
+	pgb->cpt_end = pgb->cpt_start + PAGE_SIZE;
+
+	ctx->write(pgb, sizeof(*pgb), ctx);
+	cpt_release_buf(ctx);
+
+	ctx->write(vsyscall_addr, PAGE_SIZE, ctx);
+
+	cpt_close_object(ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_dump(struct cpt_context *ctx)
+{
+	struct ve_struct *oldenv, *env;
+	int err, err2 = 0;
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+
+	down_read(&env->op_sem);
+	err = -ESRCH;
+	if (!env->is_running)
+		goto out_noenv;
+	if (!env->is_locked)
+		goto out_noenv;
+
+	oldenv = set_exec_env(env);
+
+	/* Phase 2: real checkpointing */
+	err = cpt_open_dumpfile(ctx);
+	if (err)
+		goto out;
+	
+	cpt_major_hdr_out(ctx);
+
+	if (!err)
+		err = cpt_dump_veinfo(ctx);
+	if (!err)
+		err = cpt_dump_ubc(ctx);
+	if (!err)
+		err = cpt_dump_ifinfo(ctx);
+	if (!err)
+		err = cpt_dump_files(ctx);
+	if (!err)
+		err = cpt_dump_files_struct(ctx);
+	if (!err)
+		err = cpt_dump_fs_struct(ctx);
+	if (!err)
+		err = cpt_dump_namespace(ctx);
+	if (!err)
+		err = cpt_dump_sighand(ctx);
+	if (!err)
+		err = cpt_dump_vm(ctx);
+	if (!err)
+		err = cpt_dump_sysvsem(ctx);
+	if (!err)
+		err = cpt_dump_sysvmsg(ctx);
+	if (!err)
+		err = cpt_dump_tasks(ctx);
+	if (!err)
+		err = cpt_dump_orphaned_sockets(ctx);
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+	if (!err)
+		err = cpt_dump_ip_conntrack(ctx);
+#endif
+	if (!err) {
+		if (virtinfo_notifier_call(VITYPE_SCP,
+					VIRTINFO_SCP_DUMP, ctx) & NOTIFY_FAIL)
+			err = -ECHRNG;
+	}
+	if (!err)
+		err = cpt_dump_utsname(ctx);
+
+	if (!err)
+		err = cpt_dump_vsyscall(ctx);
+
+	if (!err)
+		err = cpt_dump_tail(ctx);
+
+	err2 = cpt_close_dumpfile(ctx);
+
+out:
+	set_exec_env(oldenv);
+out_noenv:
+	up_read(&env->op_sem);
+	put_ve(env);
+	return err ? : err2;
+}
+
+int cpt_vps_suspend(struct cpt_context *ctx)
+{
+	struct ve_struct *oldenv, *env;
+	int err = 0;
+
+	ctx->kernel_config_flags = test_kernel_config();
+	cpt_object_init(ctx);
+
+	if (!ctx->ve_id) {
+		env = get_exec_env();
+		if (env == get_ve0())
+			return -EINVAL;
+		wprintk("undefined ve_id\n");
+		ctx->ve_id = env->veid;
+		get_ve(env);
+	} else {
+		env = get_ve_by_id(ctx->ve_id);
+		if (!env)
+			return -ESRCH;
+	}
+
+	ctx->iptables_mask = env->_iptables_modules;
+	ctx->features = env->features;
+
+	down_write(&env->op_sem);
+	err = -ESRCH;
+	if (!env->is_running)
+		goto out_noenv;
+
+	err = -EBUSY;
+	if (env->is_locked)
+		goto out_noenv;
+	env->is_locked = 1;
+	downgrade_write(&env->op_sem);
+
+	oldenv = set_exec_env(env);
+
+	/* Phase 0: find and stop all the tasks */
+	if ((err = vps_stop_tasks(ctx)) != 0)
+		goto out;
+
+	if ((err = cpt_suspend_network(ctx)) != 0)
+		goto out_wake;
+
+	/* At the moment all the state is frozen. We do not need to lock
+	 * the state, which can be changed only if the tasks are running.
+	 */
+
+	/* Phase 1: collect task tree */
+	if ((err = vps_collect_tasks(ctx)) != 0)
+		goto out_wake;
+
+	/* Phase 1': collect all the resources */
+	if ((err = cpt_collect(ctx)) != 0)
+		goto out;
+
+out:
+	set_exec_env(oldenv);
+	up_read(&env->op_sem);
+	put_ve(env);
+	return err;
+
+out_noenv:
+	up_write(&env->op_sem);
+	put_ve(env);
+	return err;
+
+out_wake:
+	read_lock(&tasklist_lock);
+	wake_ve(ctx);
+	read_unlock(&tasklist_lock);
+	goto out;
+}
+
+static void check_unsupported_netdevices(struct cpt_context *ctx, __u32 *caps)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev; dev = dev->next) {
+		if (dev != get_exec_env()->_loopback_dev &&
+#if defined(CONFIG_VE_ETHDEV) || defined(CONFIG_VE_ETHDEV_MODULE)
+		    !(KSYMREF(veth_open) && dev->open == KSYMREF(veth_open)) &&
+#endif
+		    dev != get_exec_env()->_venet_dev) {
+			eprintk_ctx("unsupported netdevice %s\n", dev->name);
+			*caps |= (1<<CPT_UNSUPPORTED_NETDEV);
+		}
+	}
+	read_unlock(&dev_base_lock);
+}
+
+static void check_one_process(struct cpt_context *ctx, __u32 *caps,
+		unsigned int flags, struct ve_struct *env,
+		task_t *root, task_t *p)
+{
+	if (p->used_math)
+		*caps |= flags;
+#ifdef CONFIG_X86_64
+	if (!(p->thread_info->flags & _TIF_IA32))
+		*caps |= ((1<<CPT_CPU_X86_EMT64)|(1<<CPT_CPU_X86_SYSCALL));
+	else if (p->mm && p->mm->context.vdso) {
+		if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+			*caps |= (1<<CPT_CPU_X86_SEP32);
+		else
+			*caps |= (1<<CPT_CPU_X86_SYSCALL32);
+	}
+#else
+	if (p->mm && p->mm->context.vdso)
+		*caps |= (1<<CPT_CPU_X86_SEP);
+#endif
+	if (vps_child_level(root, p) >= 0) {
+		if (!is_virtual_pid(virt_pid(p))) {
+			eprintk_ctx("external process %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
+			*caps |= (1<<CPT_EXTERNAL_PROCESS);
+		}
+		if (!is_virtual_pid(virt_pgid(p))) {
+			eprintk_ctx("external process group %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n", virt_pgid(p), p->pid, p->comm);
+			*caps |= (1<<CPT_EXTERNAL_PROCESS);
+		}
+		if (!is_virtual_pid(virt_sid(p))) {
+			eprintk_ctx("external process session %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n", virt_sid(p), p->pid, p->comm);
+			*caps |= (1<<CPT_EXTERNAL_PROCESS);
+		}
+	} else {
+		eprintk_ctx("foreign process %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
+		*caps |= (1<<CPT_EXTERNAL_PROCESS);
+	}
+	if (p->namespace && p->namespace != current->namespace) {
+		eprintk_ctx("namespaces are not supported: process %d/%d(%s)\n", virt_pid(p), p->pid, p->comm);
+		*caps |= (1<<CPT_NAMESPACES);
+	}
+	if (p->policy != SCHED_NORMAL) {
+		eprintk_ctx("scheduler policy is not supported %d/%d(%s)\n", virt_pid(p), p->pid, p->comm);
+		*caps |= (1<<CPT_SCHEDULER_POLICY);
+	}
+	if (p->parent) {
+		if (p->parent != p->real_parent &&
+				VE_TASK_INFO(p->parent)->owner_env != env) {
+			eprintk_ctx("task %d/%d(%s) is ptraced from VE0\n", p->pid, virt_pid(p), p->comm);
+			*caps |= (1<<CPT_PTRACED_FROM_VE0);
+		}
+	}
+}
+
+static void check_unsupported_mounts(struct cpt_context *ctx, __u32 *caps,
+		struct ve_struct *env, struct namespace *n, char *path_buf)
+{
+	struct list_head *p;
+	char *path;
+
+	down_read(&n->sem);
+	list_for_each(p, &n->list) {
+		struct vfsmount *mnt = list_entry(p, struct vfsmount, mnt_list);
+
+		spin_lock(&dcache_lock);
+		path = __d_path(mnt->mnt_root, mnt,
+				env->fs_root, env->fs_rootmnt,
+				path_buf, PAGE_SIZE);
+		spin_unlock(&dcache_lock);
+		if (IS_ERR(path))
+			continue;
+
+		if (check_one_vfsmount(mnt)) {
+			eprintk_ctx("Unsupported filesystem %s\n", mnt->mnt_sb->s_type->name);
+			*caps |= (1<<CPT_UNSUPPORTED_FSTYPE);
+		}
+	}
+	up_read(&n->sem);
+}
+
+int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps)
+{
+	task_t *p;
+	task_t *root;
+	struct ve_struct *env;
+	struct ve_struct *old_env;
+	struct namespace *n;
+	int err;
+	unsigned int flags = test_cpu_caps_and_features();
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (env == NULL)
+		return -ESRCH;
+
+	down_read(&env->op_sem);
+	err = -ESRCH;
+	if (!env->is_running) {
+		eprintk_ctx("CT is not running\n");
+		goto out_noenv;
+	}
+
+	err = -EBUSY;
+	if (env->is_locked) {
+		eprintk_ctx("CT is locked\n");
+		goto out_noenv;
+	}
+
+	*caps = flags & (1<<CPT_CPU_X86_CMOV);
+	if (flags & (1 << CPT_SLM_DMPRST)) {
+		eprintk_ctx("SLM is enabled, but slm_dmprst module is not loaded\n");
+		*caps |= (1 << CPT_SLM_DMPRST);
+	}
+
+	flags &= ~((1<<CPT_CPU_X86_EMT64)|(1<<CPT_CPU_X86_IA64));
+	flags &= ~((1<<CPT_CPU_X86_SYSCALL32)|(1<<CPT_CPU_X86_SEP32));
+	flags &= ~((1<<CPT_CPU_X86_SYSCALL)|(1<<CPT_CPU_X86_SEP));
+
+	old_env = set_exec_env(env);
+
+	check_unsupported_netdevices(ctx, caps);
+
+	read_lock(&tasklist_lock);
+	root = find_task_by_pid_ve(1);
+	if (!root) {
+		read_unlock(&tasklist_lock);
+		eprintk_ctx("cannot find ve init\n");
+		err = -ESRCH;
+		goto out;
+	}
+	get_task_struct(root);
+	for (p = __first_task_ve(env); p != NULL ; p = __next_task_ve(env, p))
+		check_one_process(ctx, caps, flags, env, root, p);
+	read_unlock(&tasklist_lock);
+
+	task_lock(root);
+	n = root->namespace;
+	if (n)
+		get_namespace(n);
+	task_unlock(root);
+
+	if (n) {
+		char *path_buf;
+
+		path_buf = (char *) __get_free_page(GFP_KERNEL);
+		if (!path_buf) {
+			put_namespace(n);
+			err = -ENOMEM;
+			goto out_root;
+		}
+
+		check_unsupported_mounts(ctx, caps, env, n, path_buf);
+
+		free_page((unsigned long) path_buf);
+		put_namespace(n);
+	}
+
+	err = 0;
+
+out_root:
+	put_task_struct(root);
+out:
+	set_exec_env(old_env);
+out_noenv:
+	up_read(&env->op_sem);
+	put_ve(env);
+
+	return err;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_dump.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_dump.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_dump.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_dump.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,14 @@
+int cpt_dump(struct cpt_context *cpt);
+int rst_undump(struct cpt_context *cpt);
+int cpt_suspend(struct cpt_context *cpt);
+int cpt_resume(struct cpt_context *cpt);
+int cpt_kill(struct cpt_context *cpt);
+int rst_clean(struct cpt_context *cpt);
+int rst_resume(struct cpt_context *cpt);
+int rst_kill(struct cpt_context *cpt);
+
+int cpt_freeze_one(pid_t pid, int freeze);
+int cpt_vps_suspend(struct cpt_context *ctx);
+int vps_rst_undump(struct cpt_context *ctx);
+
+int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps);
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_epoll.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_epoll.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_epoll.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_epoll.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,105 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/eventpoll.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+extern struct file_operations eventpoll_fops;
+
+int cpt_dump_epolldev(cpt_object_t *obj, cpt_context_t *ctx)
+{
+	int err = 0;
+	struct file *file = obj->o_obj;
+	struct eventpoll *ep;
+	struct rb_node *rbp;
+	struct cpt_epoll_image ei;
+
+	if (file->f_op != &eventpoll_fops) {
+		eprintk_ctx("bad epoll file\n");
+		return -EINVAL;
+	}
+
+	ep = file->private_data;
+
+	/* eventpoll.c does not protect open /proc/N/fd, silly.
+	 * Opener will get an invalid file with uninitialized private_data
+	 */
+	if (unlikely(ep == NULL)) {
+		eprintk_ctx("bad epoll device\n");
+		return -EINVAL;
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	ei.cpt_next = CPT_NULL;
+	ei.cpt_object = CPT_OBJ_EPOLL;
+	ei.cpt_hdrlen = sizeof(ei);
+	ei.cpt_content = CPT_CONTENT_ARRAY;
+	ei.cpt_file = obj->o_pos;
+
+	ctx->write(&ei, sizeof(ei), ctx);
+
+	down(&epsem);
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		loff_t saved_obj;
+		cpt_object_t *tobj;
+		struct cpt_epoll_file_image efi;
+		struct epitem *epi;
+		epi = rb_entry(rbp, struct epitem, rbn);
+		tobj = lookup_cpt_object(CPT_OBJ_FILE, epi->ffd.file, ctx);
+		if (tobj == NULL) {
+			eprintk_ctx("epoll device refers to an external file\n");
+			err = -EBUSY;
+			break;
+		}
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		efi.cpt_next = CPT_NULL;
+		efi.cpt_object = CPT_OBJ_EPOLL_FILE;
+		efi.cpt_hdrlen = sizeof(efi);
+		efi.cpt_content = CPT_CONTENT_VOID;
+		efi.cpt_file = tobj->o_pos;
+		efi.cpt_fd = epi->ffd.fd;
+		efi.cpt_events = epi->event.events;
+		efi.cpt_data = epi->event.data;
+		efi.cpt_revents = epi->revents;
+		efi.cpt_ready = 0;
+		if (!list_empty(&epi->rdllink))
+			efi.cpt_ready = 1;
+
+		ctx->write(&efi, sizeof(efi), ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	up(&epsem);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_files.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_files.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_files.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_files.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,1429 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <ub/ub_mem.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+void cpt_printk_dentry(struct dentry *d, struct vfsmount *mnt)
+{
+	char *path;
+	unsigned long pg = __get_free_page(GFP_KERNEL);
+
+	if (!pg)
+		return;
+
+	path = d_path(d, mnt, (char *)pg, PAGE_SIZE);
+
+	if (!IS_ERR(path))
+		printk("<%s>", path);
+	free_page(pg);
+}
+
+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
+			 cpt_context_t *ctx)
+{
+	if (path[0] == '/' && !(!IS_ROOT(d) && d_unhashed(d))) {
+		struct nameidata nd;
+		if (path_lookup(path, 0, &nd)) {
+			eprintk_ctx("d_path cannot be looked up %s\n", path);
+			return -EINVAL;
+		}
+		if (nd.dentry != d || nd.mnt != mnt) {
+			eprintk_ctx("d_path is invisible %s\n", path);
+			path_release(&nd);
+			return -EINVAL;
+		}
+		path_release(&nd);
+	}
+	return 0;
+}
+
+static int
+cpt_replaced(struct dentry * de, struct vfsmount *mnt, cpt_context_t * ctx)
+{
+	int result = 0;
+
+#if defined(CONFIG_VZFS_FS) || defined(CONFIG_VZFS_FS_MODULE)
+	char *path;
+	unsigned long pg;
+	struct dentry * renamed_dentry;
+
+	if (de->d_sb->s_magic != FSMAGIC_VEFS)
+		return 0;
+	if (de->d_inode->i_nlink != 0 ||
+	    atomic_read(&de->d_inode->i_writecount) > 0) 
+		return 0;
+
+	renamed_dentry = vefs_replaced_dentry(de);
+	if (renamed_dentry == NULL)
+		return 0;
+
+	pg = __get_free_page(GFP_KERNEL);
+	if (!pg)
+		return 0;
+
+	path = d_path(de, mnt, (char *)pg, PAGE_SIZE);
+	if (!IS_ERR(path)) {
+		int len;
+		struct nameidata nd;
+
+		len = pg + PAGE_SIZE - 1 - (unsigned long)path;
+		if (len >= sizeof("(deleted) ") - 1 &&
+		    !memcmp(path, "(deleted) ", sizeof("(deleted) ") - 1)) {
+			len -= sizeof("(deleted) ") - 1;
+			path += sizeof("(deleted) ") - 1;
+		}
+
+		if (path_lookup(path, 0, &nd) == 0) {
+			if (mnt == nd.mnt &&
+			    vefs_is_renamed_dentry(nd.dentry, renamed_dentry))
+				result = 1;
+			path_release(&nd);
+		}
+	}
+	free_page(pg);
+#endif
+	return result;
+}
+
+static int cpt_dump_dentry(struct dentry *d, struct vfsmount *mnt,
+			   int replaced, cpt_context_t *ctx)
+{
+	int len;
+	char *path;
+	char *pg = cpt_get_buf(ctx);
+	loff_t saved;
+
+	path = d_path(d, mnt, pg, PAGE_SIZE);
+	len = PTR_ERR(path);
+
+	if (IS_ERR(path)) {
+		struct cpt_object_hdr o;
+		char tmp[1];
+
+		/* VZ changes d_path() to return EINVAL, when path
+		 * is not supposed to be visible inside VE.
+		 * This changes behaviour of d_path() comparing
+		 * to mainstream kernel, f.e. d_path() fails
+		 * on any kind of shared memory. Maybe, there are
+		 * another cases, but I am aware only about this one.
+		 * So, we just ignore error on shmem mounts and proceed.
+		 * Otherwise, checkpointing is prohibited because
+		 * of reference to an invisible file.
+		 */
+		if (len != -EINVAL ||
+		    mnt != get_exec_env()->shmem_mnt)
+			eprintk_ctx("d_path err=%d\n", len);
+		else
+			len = 0;
+
+		cpt_push_object(&saved, ctx);
+		cpt_open_object(NULL, ctx);
+		o.cpt_next = CPT_NULL;
+		o.cpt_object = CPT_OBJ_NAME;
+		o.cpt_hdrlen = sizeof(o);
+		o.cpt_content = CPT_CONTENT_NAME;
+		tmp[0] = 0;
+
+		ctx->write(&o, sizeof(o), ctx);
+		ctx->write(tmp, 1, ctx);
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved, ctx);
+
+		__cpt_release_buf(ctx);
+		return len;
+	} else {
+		struct cpt_object_hdr o;
+
+		len = pg + PAGE_SIZE - 1 - path;
+		if (replaced &&
+		    len >= sizeof("(deleted) ") - 1 &&
+		    !memcmp(path, "(deleted) ", sizeof("(deleted) ") - 1)) {
+			len -= sizeof("(deleted) ") - 1;
+			path += sizeof("(deleted) ") - 1;
+		}
+		o.cpt_next = CPT_NULL;
+		o.cpt_object = CPT_OBJ_NAME;
+		o.cpt_hdrlen = sizeof(o);
+		o.cpt_content = CPT_CONTENT_NAME;
+		path[len] = 0;
+
+		if (cpt_verify_overmount(path, d, mnt, ctx)) {
+			__cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+
+		cpt_push_object(&saved, ctx);
+		cpt_open_object(NULL, ctx);
+		ctx->write(&o, sizeof(o), ctx);
+		ctx->write(path, len+1, ctx);
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved, ctx);
+		__cpt_release_buf(ctx);
+	}
+	return 0;
+}
+
+int cpt_dump_string(const char *s, struct cpt_context *ctx)
+{
+	int len;
+	struct cpt_object_hdr o;
+
+	cpt_open_object(NULL, ctx);
+	len = strlen(s);
+	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(s, len+1, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	return 0;
+}
+
+static int
+cpt_dump_filename(struct file *file, int replaced, cpt_context_t *ctx)
+{
+	return cpt_dump_dentry(file->f_dentry, file->f_vfsmnt, replaced, ctx);
+}
+
+int cpt_dump_inode(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_inode_image *v = cpt_get_buf(ctx);
+	struct kstat sbuf;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_INODE;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	if ((err = vfs_getattr(mnt, d, &sbuf)) != 0) {
+		cpt_release_buf(ctx);
+		return err;
+	}
+
+	v->cpt_dev	= d->d_inode->i_sb->s_dev;
+	v->cpt_ino	= d->d_inode->i_ino;
+	v->cpt_mode	= sbuf.mode;
+	v->cpt_nlink	= sbuf.nlink;
+	v->cpt_uid	= sbuf.uid;
+	v->cpt_gid	= sbuf.gid;
+	v->cpt_rdev	= d->d_inode->i_rdev;
+	v->cpt_size	= sbuf.size;
+	v->cpt_atime	= cpt_timespec_export(&sbuf.atime);
+	v->cpt_mtime	= cpt_timespec_export(&sbuf.mtime);
+	v->cpt_ctime	= cpt_timespec_export(&sbuf.ctime);
+	v->cpt_blksize	= sbuf.blksize;
+	v->cpt_blocks	= sbuf.blocks;
+	v->cpt_sb	= d->d_inode->i_sb->s_magic;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_collect_files(cpt_context_t * ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	int index = 0;
+
+	/* Collect process fd sets */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->files && cpt_object_add(CPT_OBJ_FILES, tsk->files, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	/* Collect files from fd sets */
+	for_each_object(obj, CPT_OBJ_FILES) {
+		int fd;
+		struct files_struct *f = obj->o_obj;
+
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if (obj->o_count != atomic_read(&f->count)) {
+			eprintk_ctx("files_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&f->count));
+			return -EBUSY;
+		}
+
+		for (fd = 0; fd < f->max_fds; fd++) {
+			struct file *file = fcheck_files(f, fd);
+			if (file && cpt_object_add(CPT_OBJ_FILE, file, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+
+	/* Collect files queued by AF_UNIX sockets. */
+	if ((err = cpt_collect_passedfds(ctx)) < 0)
+		return err;
+
+	/* OK. At this point we should count all the references. */
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		struct file *parent;
+		cpt_object_t *ino_obj;
+
+		if (obj->o_count != atomic_read(&file->f_count)) {
+			eprintk_ctx("file struct is referenced outside %d %d\n", obj->o_count, atomic_read(&file->f_count));
+			cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
+			return -EBUSY;
+		}
+
+		switch (file->f_dentry->d_inode->i_sb->s_magic) {
+		case FSMAGIC_FUTEX:
+		case FSMAGIC_MQUEUE:
+		case FSMAGIC_BDEV:
+			eprintk_ctx("file on unsupported FS: magic %08lx\n", file->f_dentry->d_inode->i_sb->s_magic);
+			return -EBUSY;
+		}
+
+		/* Collect inode. It is necessary mostly to resolve deleted
+		 * hard links. */
+		ino_obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+		if (ino_obj == NULL)
+			return -ENOMEM;
+
+		parent = ino_obj->o_parent;
+		if (!parent || (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry)))
+			ino_obj->o_parent = file;
+
+		if (S_ISCHR(file->f_dentry->d_inode->i_mode)) {
+			int maj = imajor(file->f_dentry->d_inode);
+			if (maj == PTY_MASTER_MAJOR ||
+			    (maj >= UNIX98_PTY_MASTER_MAJOR &&
+			     maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
+			    maj == PTY_SLAVE_MAJOR ||
+			    maj == UNIX98_PTY_SLAVE_MAJOR ||
+			    maj == TTYAUX_MAJOR) {
+				err = cpt_collect_tty(file, ctx);
+				if (err)
+					return err;
+			}
+		}
+
+		if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
+			err = cpt_collect_socket(file, ctx);
+			if (err)
+				return err;
+		}
+	}
+
+	err = cpt_index_sockets(ctx);
+
+	return err;
+}
+
+/* /dev/ptmx is special, all the files share one inode, but real tty backend
+ * is attached via file->private_data.
+ */
+
+static inline int is_cloning_inode(struct inode *ino)
+{
+	return S_ISCHR(ino->i_mode) && 
+		ino->i_rdev == MKDEV(TTYAUX_MAJOR,2);
+}
+
+static int dump_one_flock(struct file_lock *fl, int owner, struct cpt_context *ctx)
+{
+	pid_t pid;
+	struct cpt_flock_image *v = cpt_get_buf(ctx);
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_FLOCK;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_owner = owner;
+
+	pid = fl->fl_pid;
+	if (pid && !is_virtual_pid(fl->fl_pid)) {
+		pid = _pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+		if (pid == -1) {
+			if (!(fl->fl_flags&FL_FLOCK)) {
+				eprintk_ctx("posix lock from another VE?\n");
+				cpt_release_buf(ctx);
+				return -EBUSY;
+			}
+			pid = 0;
+		}
+	}
+
+	v->cpt_pid = pid;
+	v->cpt_start = fl->fl_start;
+	v->cpt_end = fl->fl_end;
+	v->cpt_flags = fl->fl_flags;
+	v->cpt_type = fl->fl_type;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+
+int cpt_dump_flock(struct file *file, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct file_lock *fl;
+
+	lock_kernel();
+	for (fl = file->f_dentry->d_inode->i_flock;
+	     fl; fl = fl->fl_next) {
+		if (file != fl->fl_file)
+			continue;
+		if (fl->fl_flags & FL_LEASE) {
+			eprintk_ctx("lease lock is not supported\n");
+			err = -EINVAL;
+			break;
+		}
+		if (fl->fl_flags & FL_POSIX) {
+			cpt_object_t *obj;
+			obj = lookup_cpt_object(CPT_OBJ_FILES, fl->fl_owner, ctx);
+			if (obj) {
+				dump_one_flock(fl, obj->o_index, ctx);
+				continue;
+			} else {
+				eprintk_ctx("unknown lock owner %p\n", fl->fl_owner);
+				err = -EINVAL;
+			}
+		}
+		if (fl->fl_flags & FL_FLOCK) {
+			dump_one_flock(fl, -1, ctx);
+			continue;
+		}
+	}
+	unlock_kernel();
+	return err;
+}
+
+static int __comb_pid_to_vpid(int pid)
+{
+	int vpid = pid;
+
+	if (pid > 0) {
+		vpid = _pid_type_to_vpid(PIDTYPE_PID, pid);
+		if (unlikely(vpid < 0)) {
+			dprintk("pid %d does not exist amymore.\n", pid);
+			return 0;
+		}
+	} else if (pid < 0) {
+		vpid = _pid_type_to_vpid(PIDTYPE_PGID, -pid);
+		if (unlikely(vpid < 0)) {
+			dprintk("pgid %d does not exist amymore.\n", -pid);
+			return 0;
+		}
+		vpid = -vpid;
+	}
+	return vpid;
+}
+
+static int dump_one_file(cpt_object_t *obj, struct file *file, cpt_context_t *ctx)
+{
+	int err = 0;
+	cpt_object_t *iobj;
+	struct cpt_file_image *v = cpt_get_buf(ctx);
+	struct kstat sbuf;
+	int replaced = 0;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILE;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_flags = file->f_flags;
+	v->cpt_mode = file->f_mode;
+	v->cpt_pos = file->f_pos;
+	v->cpt_uid = file->f_uid;
+	v->cpt_gid = file->f_gid;
+
+	vfs_getattr(file->f_vfsmnt, file->f_dentry, &sbuf);
+
+	v->cpt_i_mode = sbuf.mode;
+	v->cpt_lflags = 0;
+	if (IS_ROOT(file->f_dentry))
+		v->cpt_lflags |= CPT_DENTRY_ROOT;
+	else if (d_unhashed(file->f_dentry)) {
+		if (cpt_replaced(file->f_dentry, file->f_vfsmnt, ctx)) {
+			v->cpt_lflags |= CPT_DENTRY_REPLACED;
+			replaced = 1;
+		} else {
+			v->cpt_lflags |= CPT_DENTRY_DELETED;
+		}
+	}
+	if (is_cloning_inode(file->f_dentry->d_inode))
+		v->cpt_lflags |= CPT_DENTRY_CLONING;
+	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_PROC)
+		v->cpt_lflags |= CPT_DENTRY_PROC;
+	v->cpt_inode = CPT_NULL;
+	if (!(v->cpt_lflags & CPT_DENTRY_REPLACED)) {
+		iobj = lookup_cpt_object(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+		if (iobj)
+			v->cpt_inode = iobj->o_pos;
+	}
+	v->cpt_priv = CPT_NULL;
+	v->cpt_fown_fd = -1;
+	if (S_ISCHR(v->cpt_i_mode)) {
+		iobj = lookup_cpt_object(CPT_OBJ_TTY, file->private_data, ctx);
+		if (iobj) {
+			v->cpt_priv = iobj->o_pos;
+			if (file->f_flags&FASYNC)
+				v->cpt_fown_fd = cpt_tty_fasync(file, ctx);
+		}
+	}
+	if (S_ISSOCK(v->cpt_i_mode)) {
+		if (obj->o_index < 0) {
+			eprintk_ctx("BUG: no socket index\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_priv = obj->o_index;
+		if (file->f_flags&FASYNC)
+			v->cpt_fown_fd = cpt_socket_fasync(file, ctx);
+	}
+	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
+		v->cpt_priv = file->f_dentry->d_inode->i_ino;
+		v->cpt_lflags |= CPT_DENTRY_EPOLL;
+	}
+
+	v->cpt_fown_pid = __comb_pid_to_vpid((int)file->f_owner.pid);
+	v->cpt_fown_uid = file->f_owner.uid;
+	v->cpt_fown_euid = file->f_owner.euid;
+	v->cpt_fown_signo = file->f_owner.signum;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (!S_ISSOCK(v->cpt_i_mode)) {
+		err = cpt_dump_filename(file, replaced, ctx);
+		if (err)
+			return err;
+		if ((file->f_mode & FMODE_WRITE) &&
+		    file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_VEFS)
+			vefs_track_notify(file->f_dentry, 1);
+	}
+
+	if (file->f_dentry->d_inode->i_flock)
+		err = cpt_dump_flock(file, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+/* About this weird function... Crappy code dealing with SYSV shared memory 
+ * defines TMPFS inode and file with f_op doing only mmap. So...
+ * Maybe, this is wrong and leaks something. It is clear access to
+ * SYSV shmem via mmap is quite unusual and impossible from user space.
+ */
+static int dump_content_shm(struct file *file, struct cpt_context *ctx)
+{
+	struct cpt_obj_bits *v;
+	loff_t saved_pos;
+	unsigned long addr;
+
+	addr = do_mmap_pgoff(file, 0, file->f_dentry->d_inode->i_size,
+			     PROT_READ, MAP_SHARED, 0);
+	if (IS_ERR((void*)addr))
+		return PTR_ERR((void*)addr);
+
+	cpt_push_object(&saved_pos, ctx);
+	cpt_open_object(NULL, ctx);
+	v = cpt_get_buf(ctx);
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_BITS;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_DATA;
+	v->cpt_size = file->f_dentry->d_inode->i_size;
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	ctx->write((void*)addr, file->f_dentry->d_inode->i_size, ctx);
+	ctx->align(ctx);
+	do_munmap(current->mm, addr, file->f_dentry->d_inode->i_size);
+
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_pos, ctx);
+	return 0;
+}
+
+static int data_is_zero(char *addr, int len)
+{
+	int i;
+	unsigned long zerolong = 0;
+
+	for (i=0; i<len/sizeof(unsigned long); i++) {
+		if (((unsigned long*)(addr))[i] != 0)
+			return 0;
+	}
+	i = len % sizeof(unsigned long);
+	if (!i)
+		return 1;
+	return memcmp(addr + len - i, &zerolong, i) == 0;
+}
+
+
+static int dump_content_regular(struct file *file, struct cpt_context *ctx)
+{
+	loff_t saved_pos;
+	loff_t pos = 0;
+	loff_t obj_opened = CPT_NULL;
+	struct cpt_page_block pgb;
+	ssize_t (*do_read)(struct file *, char __user *, size_t, loff_t *);
+
+	if (file->f_op == NULL)
+		return -EINVAL;
+
+	if ((do_read = file->f_op->read) == NULL) {
+		if (file->f_op->mmap == NULL)
+			return -EINVAL;
+		if (file->f_dentry->d_inode->i_sb->s_magic != FSMAGIC_TMPFS) {
+			eprintk_ctx("unreadable, but not SYSV SHM file\n");
+			return -EINVAL;
+		}
+		
+		do_read = file->f_dentry->d_inode->i_fop->read;
+		cpt_dump_content_sysvshm(file, ctx);
+		if (!do_read) {
+			wprintk_ctx("TMPFS is not configured?\n");
+			return dump_content_shm(file, ctx);
+		}
+	}
+
+	if (!(file->f_mode & FMODE_READ)) {
+		struct file *filp;
+		filp = dentry_open(dget(file->f_dentry),
+				   mntget(file->f_vfsmnt),
+				   O_RDONLY | O_LARGEFILE);
+		if (IS_ERR(filp)) {
+			cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
+			eprintk_ctx("cannot reopen file for read %ld\n", PTR_ERR(filp));
+			return PTR_ERR(filp);
+		}
+		file = filp;
+	} else {
+		atomic_inc(&file->f_count);
+	}
+
+	for (;;) {
+		mm_segment_t oldfs;
+		int err;
+
+		(void)cpt_get_buf(ctx);
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = do_read(file, ctx->tmpbuf, PAGE_SIZE, &pos);
+		set_fs(oldfs);
+		if (err < 0) {
+			eprintk_ctx("dump_content_regular: do_read: %d", err);
+			fput(file);
+			__cpt_release_buf(ctx);
+			return err;
+		}
+		if (err == 0) {
+			__cpt_release_buf(ctx);
+			break;
+		}
+		if (data_is_zero(ctx->tmpbuf, err)) {
+			if (obj_opened != CPT_NULL) {
+				ctx->pwrite(&pgb.cpt_end, 8, ctx, obj_opened + offsetof(struct cpt_page_block, cpt_end));
+				ctx->align(ctx);
+				cpt_close_object(ctx);
+				cpt_pop_object(&saved_pos, ctx);
+				obj_opened = CPT_NULL;
+			}
+		} else {
+			if (obj_opened == CPT_NULL) {
+				cpt_push_object(&saved_pos, ctx);
+				cpt_open_object(NULL, ctx);
+				obj_opened = ctx->file->f_pos;
+				pgb.cpt_next = CPT_NULL;
+				pgb.cpt_object = CPT_OBJ_PAGES;
+				pgb.cpt_hdrlen = sizeof(pgb);
+				pgb.cpt_content = CPT_CONTENT_DATA;
+				pgb.cpt_start = pos - err;
+				pgb.cpt_end = pgb.cpt_start;
+				ctx->write(&pgb, sizeof(pgb), ctx);
+			}
+			ctx->write(ctx->tmpbuf, err, ctx);
+			pgb.cpt_end += err;
+		}
+		__cpt_release_buf(ctx);
+	}
+
+	fput(file);
+
+	if (obj_opened != CPT_NULL) {
+		ctx->pwrite(&pgb.cpt_end, 8, ctx, obj_opened + offsetof(struct cpt_page_block, cpt_end));
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_pos, ctx);
+		obj_opened = CPT_NULL;
+	}
+	return 0;
+}
+
+
+static int dump_content_chrdev(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	int maj;
+
+	maj = imajor(ino);
+	if (maj == MEM_MAJOR) {
+		/* Well, OK. */
+		return 0;
+	}
+	if (maj == PTY_MASTER_MAJOR ||
+	    (maj >= UNIX98_PTY_MASTER_MAJOR &&
+	     maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
+	    maj == PTY_SLAVE_MAJOR ||
+	    maj == UNIX98_PTY_SLAVE_MAJOR ||
+	    maj == TTYAUX_MAJOR) {
+		return cpt_dump_content_tty(file, ctx);
+	}
+	eprintk_ctx("unsupported chrdev %d/%d\n", maj, iminor(ino));
+	return -EINVAL;
+}
+
+static int dump_content_blkdev(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+
+	/* We are not going to transfer them. */
+	eprintk_ctx("unsupported blkdev %d/%d\n", imajor(ino), iminor(ino));
+	return -EINVAL;
+}
+
+static int dump_content_fifo(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	cpt_object_t *obj;
+	loff_t saved_pos;
+	int readers;
+	int writers;
+	int anon = 0;
+
+	down(PIPE_SEM(*ino));
+	readers = PIPE_READERS(*ino);
+	writers = PIPE_WRITERS(*ino);
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file1 = obj->o_obj;
+		if (file1->f_dentry->d_inode == ino) {
+			if (file1->f_mode & FMODE_READ)
+				readers--;
+			if (file1->f_mode & FMODE_WRITE)
+				writers--;
+		}
+	}	
+	up(PIPE_SEM(*ino));
+	if (readers || writers) {
+		struct dentry *dr = file->f_dentry->d_sb->s_root;
+		if (dr->d_name.len == 7 && memcmp(dr->d_name.name,"pipefs:",7) == 0)
+			anon = 1;
+
+		if (anon) {
+			eprintk_ctx("pipe has %d/%d external readers/writers\n", readers, writers);
+			return -EBUSY;
+		}
+		/* If fifo has external readers/writers, we are in troubles.
+		 * If the buffer is not empty, we must move its content.
+		 * But if the fifo is owned by a service, we cannot do
+		 * this. See?
+		 *
+		 * For now we assume, that if fifo is opened by another
+		 * process, we do not own it and, hence, migrate without
+		 * data.
+		 */
+		return 0;
+	}
+
+	/* OK, we must save fifo state. No semaphores required. */
+
+	if (PIPE_LEN(*ino)) {
+		struct cpt_obj_bits *v = cpt_get_buf(ctx);
+		cpt_push_object(&saved_pos, ctx);
+		cpt_open_object(NULL, ctx);
+		v->cpt_next = CPT_NULL;
+		v->cpt_object = CPT_OBJ_BITS;
+		v->cpt_hdrlen = sizeof(*v);
+		v->cpt_content = CPT_CONTENT_DATA;
+		v->cpt_size = PIPE_LEN(*ino);
+		ctx->write(v, sizeof(*v), ctx);
+		cpt_release_buf(ctx);
+
+		ctx->write(PIPE_BASE(*ino) + PIPE_START(*ino), PIPE_MAX_RCHUNK(*ino), ctx);
+		if (PIPE_LEN(*ino) > PIPE_MAX_RCHUNK(*ino))
+			ctx->write(PIPE_BASE(*ino), PIPE_LEN(*ino)-PIPE_MAX_RCHUNK(*ino), ctx);
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_pos, ctx);
+	}
+
+	return 0;
+}
+
+static int dump_content_socket(struct file *file, struct cpt_context *ctx)
+{
+	return 0;
+}
+
+static int dump_one_inode(struct file *file, struct dentry *d,
+			  struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct inode *ino = d->d_inode;
+	cpt_object_t *iobj;
+	int dump_it = 0;
+
+	iobj = lookup_cpt_object(CPT_OBJ_INODE, ino, ctx);
+	if (!iobj)
+		return -EINVAL;
+
+	if (iobj->o_pos >= 0)
+		return 0;
+
+	if ((!IS_ROOT(d) && d_unhashed(d)) &&
+	    !cpt_replaced(d, mnt, ctx))
+		dump_it = 1;
+	if (!S_ISREG(ino->i_mode) && !S_ISDIR(ino->i_mode)) {
+		/* One more bug in epoll: invalid inode mode.
+		 * What a load of crap...
+		 */
+		if (ino->i_sb->s_magic == FSMAGIC_EPOLL &&
+		    (ino->i_mode & S_IFMT) == 0)
+			return 0;
+		dump_it = 1;
+	}
+
+	if (!dump_it)
+		return 0;
+
+	cpt_open_object(iobj, ctx);
+	cpt_dump_inode(d, mnt, ctx);
+
+	if (!IS_ROOT(d) && d_unhashed(d)) {
+		struct file *parent;
+		parent = iobj->o_parent;
+		if (!parent ||
+		    (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry))) {
+			/* Inode is not deleted, but it does not
+			 * have references from inside checkpointed
+			 * process group. We have options:
+			 * A. Fail, abort checkpointing
+			 * B. Proceed. File will be cloned.
+			 * A is correct, B is more complicated */
+			/* Just as a hint where to create deleted file */
+			if (ino->i_nlink != 0) {
+				eprintk_ctx("deleted reference to existing inode, checkpointing is impossible\n");
+				return -EBUSY;
+			}
+		} else {
+			/* Refer to _another_ file name. */
+			err = cpt_dump_filename(parent, 0, ctx);
+			if (err)
+				return err;
+			if (S_ISREG(ino->i_mode) || S_ISDIR(ino->i_mode))
+				dump_it = 0;
+		}
+	}
+	if (dump_it) {
+		if (S_ISREG(ino->i_mode)) {
+			if ((err = dump_content_regular(file, ctx)) != 0) {
+				eprintk_ctx("dump_content_regular ");
+				cpt_printk_dentry(d, mnt);
+			}
+		} else if (S_ISDIR(ino->i_mode)) {
+			/* We cannot do anything. The directory should be
+			 * empty, so it is not a big deal.
+			 */
+		} else if (S_ISCHR(ino->i_mode)) {
+			err = dump_content_chrdev(file, ctx);
+		} else if (S_ISBLK(ino->i_mode)) {
+			err = dump_content_blkdev(file, ctx);
+		} else if (S_ISFIFO(ino->i_mode)) {
+			err = dump_content_fifo(file, ctx);
+		} else if (S_ISSOCK(ino->i_mode)) {
+			err = dump_content_socket(file, ctx);
+		} else {
+			eprintk_ctx("unknown inode mode %o\n", ino->i_mode & S_IFMT);
+			err = -EINVAL;
+		}
+	}
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_dump_files(struct cpt_context *ctx)
+{
+	int epoll_nr;
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_TTY);
+	for_each_object(obj, CPT_OBJ_TTY) {
+		int err;
+
+		if ((err = cpt_dump_tty(obj, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	cpt_open_section(ctx, CPT_SECT_INODE);
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		int err;
+
+		if ((err = dump_one_inode(file, file->f_dentry,
+					  file->f_vfsmnt, ctx)) != 0)
+			return err;
+	}
+	for_each_object(obj, CPT_OBJ_FS) {
+		struct fs_struct *fs = obj->o_obj;
+		int err;
+
+		if (fs->root &&
+		    (err = dump_one_inode(NULL, fs->root, fs->rootmnt, ctx)) != 0)
+			return err;
+		if (fs->pwd &&
+		    (err = dump_one_inode(NULL, fs->pwd, fs->pwdmnt, ctx)) != 0)
+			return err;
+		if (fs->altroot &&
+		    (err = dump_one_inode(NULL, fs->altroot, fs->altrootmnt, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	epoll_nr = 0;
+	cpt_open_section(ctx, CPT_SECT_FILES);
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		int err;
+
+		if ((err = dump_one_file(obj, file, ctx)) != 0)
+			return err;
+		if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL)
+			epoll_nr++;
+	}
+	cpt_close_section(ctx);
+
+	if (epoll_nr) {
+		cpt_open_section(ctx, CPT_SECT_EPOLL);
+		for_each_object(obj, CPT_OBJ_FILE) {
+			struct file *file = obj->o_obj;
+			if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
+				int err;
+				if ((err = cpt_dump_epolldev(obj, ctx)) != 0)
+					return err;
+			}
+		}
+		cpt_close_section(ctx);
+	}
+
+	cpt_open_section(ctx, CPT_SECT_SOCKET);
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		int err;
+
+		if ((err = cpt_dump_socket(obj, obj->o_obj, obj->o_index, -1, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	return 0;
+}
+
+static int dump_filedesc(int fd, struct file *file,
+			 struct files_struct *f, struct cpt_context *ctx)
+{
+	struct cpt_fd_image *v = cpt_get_buf(ctx);
+	cpt_object_t *obj;
+
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILEDESC;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_fd = fd;
+	obj = lookup_cpt_object(CPT_OBJ_FILE, file, ctx);
+	if (!obj) BUG();
+	v->cpt_file = obj->o_pos;
+	v->cpt_flags = 0;
+	if (FD_ISSET(fd, f->close_on_exec))
+		v->cpt_flags = CPT_FD_FLAG_CLOSEEXEC;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+static int dump_one_file_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct files_struct *f = obj->o_obj;
+	struct cpt_files_struct_image *v = cpt_get_buf(ctx);
+	int fd;
+	loff_t saved_obj;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILES;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_index = obj->o_index;
+	v->cpt_max_fds = f->max_fds;
+	v->cpt_next_fd = f->next_fd;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	for (fd = 0; fd < f->max_fds; fd++) {
+		struct file *file = fcheck_files(f, fd);
+		if (file)
+			dump_filedesc(fd, file, f, ctx);
+	}
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+int cpt_dump_files_struct(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_FILES_STRUCT);
+
+	for_each_object(obj, CPT_OBJ_FILES) {
+		int err;
+
+		if ((err = dump_one_file_struct(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_collect_fs(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->fs) {
+			if (cpt_object_add(CPT_OBJ_FS, tsk->fs, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->pwd &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->pwd->d_inode, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->root &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->root->d_inode, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->altroot &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->altroot->d_inode, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+static int cpt_dump_dir(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	struct file file;
+
+	memset(&file, 0, sizeof(file));
+
+	file.f_dentry = d;
+	file.f_vfsmnt = mnt;
+	file.f_mode = FMODE_READ|FMODE_PREAD|FMODE_LSEEK;
+	return dump_one_file(NULL, &file, ctx);
+}
+
+static int dump_one_fs(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct fs_struct *fs = obj->o_obj;
+	struct cpt_fs_struct_image *v = cpt_get_buf(ctx);
+	loff_t saved_obj;
+	int err;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FS;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_umask = fs->umask;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	err = cpt_dump_dir(fs->root, fs->rootmnt, ctx);
+	if (!err)
+		err = cpt_dump_dir(fs->pwd, fs->pwdmnt, ctx);
+	if (!err && fs->altroot)
+		err = cpt_dump_dir(fs->altroot, fs->altrootmnt, ctx);
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_dump_fs_struct(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_FS);
+
+	for_each_object(obj, CPT_OBJ_FS) {
+		int err;
+
+		if ((err = dump_one_fs(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int check_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct namespace *n = obj->o_obj;
+	struct list_head *p;
+	char *path_buf, *path;
+
+	path_buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!path_buf)
+		return -ENOMEM;
+
+	down_read(&n->sem);
+	list_for_each(p, &n->list) {
+		struct vfsmount *mnt = list_entry(p, struct vfsmount, mnt_list);
+
+		path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+		if (IS_ERR(path))
+			continue;
+
+		if (check_one_vfsmount(mnt)) {
+			eprintk_ctx("unsupported fs type %s\n", mnt->mnt_sb->s_type->name);
+			err = -EINVAL;
+			break;
+		}
+	}
+	up_read(&n->sem);
+
+	free_page((unsigned long) path_buf);
+
+	return err;
+}
+
+int cpt_collect_namespace(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->namespace && cpt_object_add(CPT_OBJ_NAMESPACE, tsk->namespace, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	for_each_object(obj, CPT_OBJ_NAMESPACE) {
+		int err;
+		if ((err = check_one_namespace(obj, ctx)) != 0)
+			return err;
+	}
+
+	return 0;
+}
+
+struct args_t
+{
+	int* pfd;
+	char* path;
+};
+
+static int dumptmpfs(void *arg)
+{
+	int i;
+	struct args_t *args = arg;
+	int *pfd = args->pfd;
+	char *path = args->path;
+	char *argv[] = { "tar", "-c", "-S", "--numeric-owner", path, NULL };
+
+	i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
+	if (i < 0) {
+		eprintk("cannot enter ve to dump tmpfs\n");
+		module_put(THIS_MODULE);
+		return 1;
+	}
+
+	if (pfd[1] != 1)
+		sc_dup2(pfd[1], 1);
+
+	for (i=0; i<current->files->max_fds; i++) {
+		if (i != 1)
+			sc_close(i);
+	}
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/bin/tar", argv, NULL);
+	eprintk("failed to exec /bin/tar: %d\n", i);
+	return -1;
+}
+
+static int cpt_dump_tmpfs(char *path, struct cpt_context *ctx)
+{
+	int err;
+	int pid;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	char buf[16];
+	int n;
+	loff_t saved_obj;
+	struct args_t args;
+	
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	args.pfd = pfd;
+	args.path = path;
+	err = pid = local_kernel_thread(dumptmpfs, (void*)&args, SIGCHLD, 0);
+	if (err < 0)
+		goto out;
+	f = fget(pfd[0]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NAME;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	do {
+		mm_segment_t oldfs;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
+		set_fs(oldfs);
+		if (n > 0)
+			ctx->write(buf, n, ctx);
+	} while (n > 0);
+
+	fput(f);
+
+	if ((err = sc_waitx(pid, 0)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+
+	buf[0] = 0;
+	ctx->write(buf, 1, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	return n;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	return err;
+}
+
+static int loopy_root(struct vfsmount *mnt)
+{
+	struct list_head *p;
+
+	list_for_each(p, &mnt->mnt_namespace->list) {
+		struct vfsmount * m = list_entry(p, struct vfsmount, mnt_list);
+		if (m == mnt)
+			return 0;
+		if (m->mnt_sb == mnt->mnt_sb)
+			return 1;
+	}
+	/* Cannot happen */
+	return 0;
+}
+
+static int cpt_dump_bind_mnt(struct vfsmount * mnt, cpt_context_t * ctx)
+{
+	struct list_head *p;
+	int err = -EINVAL;
+
+	/* One special case: mount --bind /a /a */
+	if (mnt->mnt_root == mnt->mnt_mountpoint)
+		return cpt_dump_dentry(mnt->mnt_root, mnt, 0, ctx);
+
+	list_for_each_prev(p, &mnt->mnt_list) {
+		struct vfsmount * m;
+
+		if (p == &mnt->mnt_namespace->list)
+			break;
+
+		m = list_entry(p, struct vfsmount, mnt_list);
+
+		if (m->mnt_sb != mnt->mnt_sb)
+			continue;
+
+		err = cpt_dump_dentry(mnt->mnt_root, m, 0, ctx);
+		if (err == 0)
+			break;
+	}
+	return err;
+}
+
+static int dump_vfsmount(struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct cpt_vfsmount_image v;
+	loff_t saved_obj;
+	char *path_buf, *path;
+
+	path_buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!path_buf)
+		return -ENOMEM;
+
+	path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+	if (IS_ERR(path)) {
+		free_page((unsigned long) path_buf);
+		return PTR_ERR(path) == -EINVAL ? 0 : PTR_ERR(path);
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_VFSMOUNT;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	v.cpt_mntflags = mnt->mnt_flags;
+	if (top_beancounter(slab_ub(mnt)) != top_beancounter(get_exec_ub())) {
+		v.cpt_mntflags |= CPT_MNT_EXT;
+	} else {
+		if (mnt->mnt_root != mnt->mnt_sb->s_root || loopy_root(mnt))
+			v.cpt_mntflags |= CPT_MNT_BIND;
+	}
+	v.cpt_flags = mnt->mnt_sb->s_flags;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_dump_string(mnt->mnt_devname ? : "none", ctx);
+	cpt_dump_string(path, ctx);
+	cpt_dump_string(mnt->mnt_sb->s_type->name, ctx);
+
+	if (v.cpt_mntflags & CPT_MNT_BIND)
+		err = cpt_dump_bind_mnt(mnt, ctx);
+	else if (!(v.cpt_mntflags & CPT_MNT_EXT) &&
+		   strcmp(mnt->mnt_sb->s_type->name, "tmpfs") == 0)
+		cpt_dump_tmpfs(path, ctx);
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	if (mnt->mnt_sb->s_magic == FSMAGIC_VEFS)
+		vefs_track_force_stop(mnt->mnt_sb);
+
+	free_page((unsigned long) path_buf);
+
+	return err;
+}
+
+static int dump_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct namespace *n = obj->o_obj;
+	struct cpt_object_hdr v;
+	struct list_head *p;
+	loff_t saved_obj;
+	int err = 0;
+
+	cpt_open_object(obj, ctx);
+
+	v.cpt_next = -1;
+	v.cpt_object = CPT_OBJ_NAMESPACE;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+
+	down_read(&n->sem);
+	list_for_each(p, &n->list) {
+		err = dump_vfsmount(list_entry(p, struct vfsmount, mnt_list), ctx);
+		if (err)
+			break;
+	}
+	up_read(&n->sem);
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_dump_namespace(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_NAMESPACE);
+
+	for_each_object(obj, CPT_OBJ_NAMESPACE) {
+		int err;
+
+		if ((err = dump_one_namespace(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_files.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_files.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_files.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_files.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,58 @@
+int cpt_collect_files(cpt_context_t *);
+int cpt_collect_fs(cpt_context_t *);
+int cpt_collect_namespace(cpt_context_t *);
+int cpt_collect_sysvsem_undo(cpt_context_t *);
+int cpt_collect_tty(struct file *, cpt_context_t *);
+int cpt_dump_files(struct cpt_context *ctx);
+int cpt_dump_files_struct(struct cpt_context *ctx);
+int cpt_dump_fs_struct(struct cpt_context *ctx);
+int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx);
+int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx);
+int cpt_dump_tty(cpt_object_t *, struct cpt_context *ctx);
+struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx);
+struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii, unsigned flags, struct cpt_context *ctx);
+__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx);
+
+int rst_posix_locks(struct cpt_context *ctx);
+
+struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx);
+int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_restore_fs(struct cpt_context *ctx);
+
+int cpt_collect_sysv(cpt_context_t *);
+int cpt_dump_sysvsem(struct cpt_context *ctx);
+int cpt_dump_sysvmsg(struct cpt_context *ctx);
+int rst_sysv_ipc(struct cpt_context *ctx);
+int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int cpt_dump_namespace(struct cpt_context *ctx);
+int rst_root_namespace(struct cpt_context *ctx);
+
+int rst_stray_files(struct cpt_context *ctx);
+int rst_tty_jobcontrol(struct cpt_context *ctx);
+
+void rst_flush_filejobs(struct cpt_context *);
+int rst_do_filejobs(struct cpt_context *);
+
+int rst_eventpoll(struct cpt_context *);
+struct file *cpt_open_epolldev(struct cpt_file_image *fi,
+			       unsigned flags,
+			       struct cpt_context *ctx);
+int cpt_dump_epolldev(cpt_object_t *obj, struct cpt_context *);
+
+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
+			 cpt_context_t *ctx);
+
+#define check_one_vfsmount(mnt) \
+	(strcmp(mnt->mnt_sb->s_type->name, "rootfs") != 0 && \
+	strcmp(mnt->mnt_sb->s_type->name, "ext3") != 0 && \
+	strcmp(mnt->mnt_sb->s_type->name, "ext2") != 0 && \
+	strcmp(mnt->mnt_sb->s_type->name, "vzfs") != 0 && \
+	strcmp(mnt->mnt_sb->s_type->name, "simfs") != 0 && \
+	strcmp(mnt->mnt_sb->s_type->name, "tmpfs") != 0 && \
+	strcmp(mnt->mnt_sb->s_type->name, "devpts") != 0 && \
+	strcmp(mnt->mnt_sb->s_type->name, "proc") != 0 && \
+	strcmp(mnt->mnt_sb->s_type->name, "sysfs") != 0)
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_fsmagic.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_fsmagic.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_fsmagic.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_fsmagic.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,16 @@
+/* Collected from kernel sources. */
+
+#define FSMAGIC_TMPFS	0x01021994
+#define FSMAGIC_PIPEFS	0x50495045
+#define FSMAGIC_SOCKFS	0x534F434B
+#define FSMAGIC_PFMFS	0xa0b4d889
+#define FSMAGIC_BDEV	0x62646576
+#define FSMAGIC_EPOLL	0x03111965
+#define FSMAGIC_FUTEX	0x0BAD1DEA
+#define FSMAGIC_MQUEUE	0x19800202
+#define FSMAGIC_PROC	0x9fa0
+#define FSMAGIC_DEVPTS	0x1CD1
+#define FSMAGIC_AUTOFS	0x0187
+#define FSMAGIC_EXT2	0xEF53
+#define FSMAGIC_REISER	0x52654973
+#define FSMAGIC_VEFS	0x565a4653
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_iterative.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_iterative.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_iterative.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_iterative.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,441 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/errno.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/uio.h>
+#include <asm/ldt.h>
+#include <asm/mmu.h>
+#include <asm/tlb.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_pagein.h"
+
+struct iter_data
+{
+#define CPT_XFER_BATCH	64
+	int		xfer_nr;
+	struct page	*xfer_batch[CPT_XFER_BATCH];
+	int		iter_new;
+	int		iter_young;
+	int		iter;
+};
+
+/* Algo is the following:
+ * 
+ * 1. At the first iteration all appropriate pte's are maked COW,
+ *    pages are marked PG_checkpointed and transferred (indexed
+ *    by pfn).
+ * 2. do_wp_page(), if it wants to pte_mkwrite(), clears PG_checkpointed.
+ *    Also, PG_checkpointed is cleared, when a page is unmapped.
+ * 3. At the next iterations we check PG_checkpoint. If it is set,
+ *    we are lucky. If it is not, page is new or it was changed, so that
+ *    we send new copy.
+ * 4. Iterations stop when amount of new pages is < thresh_1 or it is
+ *    more than pages found at the first iteration / 2^N. So, we never
+ *    transfer more than 2*memsize.
+ * 5. Then we freeze VE.
+ * 6. cpt_mm, if sees a page, marked PG_checkpoint, sends its pfn.
+ *    (well, and panics, if pte is writable).
+ */
+
+static int add_to_xfer_list(struct page *pg, struct iter_data *iter,
+			    cpt_context_t *ctx)
+{
+	int slot = iter->xfer_nr;
+
+	BUG_ON(slot >= CPT_XFER_BATCH);
+	iter->xfer_batch[slot] = pg;
+	return ((iter->xfer_nr = slot + 1) == CPT_XFER_BATCH);
+}
+
+static int submit_page(struct page *pg, cpt_context_t *ctx)
+{
+	int err;
+	struct iovec iov[2];
+	struct file *file = ctx->pagein_file_out;
+	mm_segment_t oldfs;
+	struct pgin_reply rep;
+
+	rep.rmid = PGIN_RMID;
+	rep.error = 0;
+	rep.handle = page_to_pfn(pg);
+
+	iov[0].iov_base = &rep;
+	iov[0].iov_len = sizeof(rep);
+	iov[1].iov_base = kmap(pg);
+	iov[1].iov_len = PAGE_SIZE;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = file->f_op->writev(file, iov, 2, &file->f_pos);
+	set_fs(oldfs);
+	kunmap(pg);
+	if (err < 0)
+		return err;
+	if (err != sizeof(rep) + PAGE_SIZE)
+		return -EIO;
+	return 0;
+}
+
+static int flush_transfer(struct iter_data *iter, cpt_context_t *ctx)
+{
+	int err = 0;
+	int slot;
+
+	for (slot = 0; slot < iter->xfer_nr; slot++) {
+		struct page *pg = iter->xfer_batch[slot];
+		if (!err)
+			err = submit_page(pg, ctx);
+		page_cache_release(pg);
+	}
+	iter->xfer_nr = 0;
+	return err;
+}
+
+static inline int iter_one_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+			       unsigned long addr, unsigned long end,
+			       struct iter_data *iter, cpt_context_t *ctx)
+{
+	int err = 0;
+	pte_t *pte;
+	struct mm_struct *mm = vma->vm_mm;
+
+	spin_lock(&mm->page_table_lock);
+	pte = pte_offset_map(pmd, addr);
+	do {
+		pte_t ptent = *pte;
+		struct page *pg;
+		int retr = 0;
+
+retry:
+		if (pte_none(ptent))
+			continue;
+		if (!pte_present(*pte)) {
+			if (pte_file(ptent))
+				continue;
+
+			pte_unmap(pte);
+			spin_unlock(&mm->page_table_lock);
+			err = handle_mm_fault(mm, vma, addr, 0);
+			if (err == VM_FAULT_SIGBUS)
+				return -EFAULT;
+			if (err == VM_FAULT_OOM)
+				return -ENOMEM;
+			err = 0;
+			spin_lock(&mm->page_table_lock);
+			pte = pte_offset_map(pmd, addr);
+			ptent = *pte;
+			retr = 1;
+			goto retry;
+		}
+
+                if (!pfn_valid(pte_pfn(ptent)) ||
+		    (pg = pfn_to_page(pte_pfn(ptent))) == NULL ||
+		    !PageAnon(pg) ||
+		    PageReserved(pg) ||
+		    pg == ZERO_PAGE(addr))
+			continue;
+
+		if (iter->iter >= 0) {
+			if (ptep_test_and_clear_young(pte) && !retr)
+				iter->iter_young++;
+		}
+
+		if (iter->iter == 0) {
+			/* Just clear the state */
+			clear_bit(PG_checkpointed, &pg->flags);
+			iter->iter_new++;
+			continue;
+		}
+
+		if (test_bit(PG_checkpointed, &pg->flags)) {
+			if (pte_write(ptent)) {
+				eprintk("COW lost %lu %lu!\n", addr, page_to_pfn(pg));
+				pte_unmap(pte);
+				spin_unlock(&mm->page_table_lock);
+				return -EFAULT;
+			}
+			continue;
+		}
+
+		iter->iter_new++;
+		get_page(pg);
+		set_bit(PG_checkpointed, &pg->flags);
+		ptep_set_wrprotect(pte);
+		if (add_to_xfer_list(pg, iter, ctx)) {
+			pte_unmap(pte);
+			spin_unlock(&mm->page_table_lock);
+			err = flush_transfer(iter, ctx);
+			flush_tlb_range(vma, vma->vm_start, vma->vm_end);
+			if (err)
+				return err;
+			spin_lock(&mm->page_table_lock);
+			pte = pte_offset_map(pmd, addr);
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	pte_unmap(pte-1);
+	spin_unlock(&mm->page_table_lock);
+
+	return err;
+}
+
+static inline int
+iter_one_pgd(struct vm_area_struct * vma, pgd_t *pgd,
+	     unsigned long addr, unsigned long end, struct iter_data *iter,
+	     cpt_context_t *ctx)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pgd, addr);
+	do {
+		int err;
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		err = iter_one_pmd(vma, pmd, addr, next, iter, ctx);
+		if (err)
+			return err;
+	} while (pmd++, addr = next, addr != end);
+	return 0;
+}
+
+static int iter_one_vma(struct iter_data *iter, struct vm_area_struct *vma,
+			task_t *tsk, cpt_context_t *ctx)
+{
+	pgd_t *pgd;
+	unsigned long addr, end, next;
+
+	addr = vma->vm_start;
+	end = vma->vm_end;
+
+	pgd = pgd_offset(vma->vm_mm, addr);
+	do {
+		int err;
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		err = iter_one_pgd(vma, pgd, addr, next, iter, ctx);
+		if (err)
+			return err;
+	} while (pgd++, addr = next, addr != end);
+	return 0;
+}
+
+static int iter_one_mm(task_t *tsk, struct mm_struct *mm,
+		       void *data, cpt_context_t *ctx)
+{
+	int err = 0, err2 = 0;
+	struct iter_data *iter = data;
+	struct vm_area_struct *vma;
+
+	/* OK, now we are going to scan VM */
+	down_read(&mm->mmap_sem);
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		/* Do only true simple anonymous VMAs. */
+		if (!vma->anon_vma)
+			continue;
+		if (is_vm_hugetlb_page(vma))
+			continue;
+		if ((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE)
+			continue;
+		err = iter_one_vma(iter, vma, tsk, ctx);
+		if (iter->xfer_nr) {
+			flush_tlb_range(vma, vma->vm_start, vma->vm_end);
+			if (iter->iter)
+				err2 = flush_transfer(iter, ctx);
+		}
+		if (err || err2)
+			break;
+	}
+	up_read(&mm->mmap_sem);
+	return err ? : err2;
+}
+
+int cpt_walk_mm(int (*doit)(task_t *tsk, struct mm_struct *mm,
+			  void *data, cpt_context_t *ctx),
+		void *data,
+		cpt_context_t *ctx)
+{
+	int err = 0;
+	task_t *p;
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (env == NULL)
+		return -ESRCH;
+
+	write_lock_irq(&tasklist_lock);
+
+	do {
+		struct mm_struct *mm;
+
+		/* VE is empty, stop scanning. */
+		if (list_empty(&env->vetask_lh))
+			break;
+
+		p = VE_TASK_LIST_2_TASK(env->vetask_lh.prev);
+		REMOVE_VE_LINKS(p);
+		list_add(&VE_TASK_INFO(p)->vetask_list, &env->vetask_lh);
+
+		get_task_struct(p);
+		write_unlock_irq(&tasklist_lock);
+
+		mm = get_task_mm(p);
+		if (mm) {
+			err = doit(p, mm, data, ctx);
+			mmput(mm);
+		}
+
+		put_task_struct(p);
+
+		cond_resched();
+
+		write_lock_irq(&tasklist_lock);
+		if (err)
+			break;
+	} while (p != env->init_entry);
+
+	write_unlock_irq(&tasklist_lock);
+
+	put_ve(env);
+
+	return err;
+}
+
+static int nread(struct file *file, void *buf, int len)
+{
+	int offset = 0;
+
+	while (offset < len) {
+		int res;
+		mm_segment_t oldfs;
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		res = vfs_read(file, buf+offset, len-offset, &file->f_pos);
+		set_fs(oldfs);
+		if (res < 0)
+			return res;
+		if (res == 0)
+			return -EIO;
+		offset += res;
+	}
+	return 0;
+}
+
+int cpt_iteration(cpt_context_t *ctx)
+{
+	int err;
+	int prev_iter, first_iter, prev_young;
+	struct iter_data *iter;
+	int tmo;
+
+	if (ctx->pagein_file_out == NULL ||
+	    ctx->pagein_file_out->f_op->writev == NULL)
+		return -EBADF;
+
+	iter = kmalloc(sizeof(struct iter_data), GFP_KERNEL);
+	if (iter == NULL)
+		return -ENOMEM;
+	memset(iter, 0, sizeof(struct iter_data));
+
+	/* Clear the state */ 
+	cpt_walk_mm(iter_one_mm, iter, ctx);
+	
+	iter->iter_new = 0;
+	iter->iter_young = 0;
+	iter->iter = 1;
+	err = cpt_walk_mm(iter_one_mm, iter, ctx);
+	prev_iter = first_iter = iter->iter_new;
+	prev_young = iter->iter_young;
+	dprintk_ctx("%d: Found %d pages, %d young\n", iter->iter, prev_iter, iter->iter_young);
+	iter->iter_new = 0;
+	iter->iter_young= 0;
+	if (err)
+		goto out;
+
+	tmo = HZ/20;
+
+	for (;;) {
+		iter->iter++;
+		current->state = TASK_UNINTERRUPTIBLE;
+		schedule_timeout(tmo);
+		err = cpt_walk_mm(iter_one_mm, iter, ctx);
+		if (err)
+			break;
+		dprintk_ctx("%d: Found %d pages, %d young, %d tmo\n", iter->iter, iter->iter_new, iter->iter_young, tmo);
+		if (iter->iter_new > prev_iter/2 ||
+		    iter->iter_young > prev_young/2) {
+			tmo /= 2;
+			if (tmo < 2)
+				tmo = 2;
+		}
+		if (iter->iter_new > first_iter/2 ||
+		    iter->iter_new < 10 ||
+		    iter->iter > 10) {
+			current->state = TASK_UNINTERRUPTIBLE;
+			schedule_timeout(tmo/2);
+			iter->iter = -1;
+			prev_iter = iter->iter_new;
+			iter->iter_new = 0;
+			cpt_walk_mm(iter_one_mm, iter, ctx);
+			dprintk_ctx("%d: Found %d pages, tmo %d\n", iter->iter, iter->iter_new, tmo);
+			ctx->iter_done = 1;
+			do {
+				union {
+					struct pgin_reply rep;
+					struct pgin_request req;
+				} u;
+				mm_segment_t oldfs;
+				struct file * file = ctx->pagein_file_out;
+
+				u.rep.rmid = PGIN_RMID;
+				u.rep.error = ITER_STOP;
+				u.rep.handle = 0;
+
+				oldfs = get_fs(); set_fs(KERNEL_DS);
+				vfs_write(file, (void*)&u.rep, sizeof(u.rep), &file->f_pos);
+				err = nread(ctx->pagein_file_in, &u.req, sizeof(u.req));
+				set_fs(oldfs);
+				if (!err) {
+					if (u.req.rmid != PGIN_RMID ||
+					    u.req.size != PGIN_STOP)
+						err = -EIO;
+				}
+			} while (0);
+			break;
+		}
+		prev_iter = iter->iter_new;
+		prev_young = iter->iter_young;
+		first_iter /= 2;
+		iter->iter_new = 0;
+		iter->iter_young= 0;
+	}
+
+out:
+	if (err) {
+		if (ctx->pagein_file_out) {
+			fput(ctx->pagein_file_out);
+			ctx->pagein_file_out = NULL;
+		}
+		if (ctx->pagein_file_in) {
+			fput(ctx->pagein_file_in);
+			ctx->pagein_file_in = NULL;
+		}
+	}
+	kfree(iter);
+	return err;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_kernel.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_kernel.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_kernel.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_kernel.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,144 @@
+#define __KERNEL_SYSCALLS__ 1
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <asm/cpufeature.h>
+#include <linux/cpt_image.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+
+#include "cpt_kernel.h"
+#include "cpt_syscalls.h"
+
+#ifndef CONFIG_X86_64
+
+extern void local_kernel_thread_helper(void);
+__asm__(".section .text\n"
+	".align 4\n"
+	"local_kernel_thread_helper:\n\t"
+	"movl %edx,%eax\n\t"
+	"pushl %edx\n\t"
+	"call *%ebx\n\t"
+	"pushl %eax\n\t"
+	"pushl $0\n\t"
+	"call complete_and_exit\n"
+	".previous");
+
+/*
+ * Create a kernel thread
+ */
+int asm_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+
+	regs.ebx = (unsigned long) fn;
+	regs.edx = (unsigned long) arg;
+
+	regs.xds = __USER_DS;
+	regs.xes = __USER_DS;
+	regs.orig_eax = -1;
+	regs.eip = (unsigned long) local_kernel_thread_helper;
+	regs.xcs = __KERNEL_CS;
+	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+
+	/* Ok, create the new process.. */
+	return do_fork_pid(flags | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL, pid);
+}
+#endif
+
+int local_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
+{
+	pid_t ret;
+
+	if (current->fs == NULL) {
+		/* do_fork_pid() hates processes without fs, oopses. */
+		printk("CPT BUG: local_kernel_thread: current->fs==NULL\n");
+		return -EINVAL;
+	}
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+	ret = asm_kernel_thread(fn, arg, flags, pid);
+	if (ret < 0)
+		module_put(THIS_MODULE);
+	return ret;
+}
+
+#ifdef __i386__
+int __execve(const char *file, char **argv, char **envp)
+{
+	long res;
+	__asm__ volatile ("int $0x80"
+			: "=a" (res)
+			: "0" (__NR_execve),"b" ((long)(file)),"c" ((long)(argv)),
+			"d" ((long)(envp)) : "memory");
+	return (int)res;
+}
+#endif
+
+int sc_execve(char *cmd, char **argv, char **env)
+{
+	int ret;
+#ifndef __i386__
+	ret = execve(cmd, argv, env);
+#else
+	ret = __execve(cmd, argv, env);
+#endif
+	return ret;
+}
+
+unsigned int test_cpu_caps_and_features()
+{
+	unsigned int flags = 0;
+	if (boot_cpu_has(X86_FEATURE_CMOV))
+		flags |= 1 << CPT_CPU_X86_CMOV;
+	if (cpu_has_fxsr)
+		flags |= 1 << CPT_CPU_X86_FXSR;
+	if (cpu_has_xmm)
+		flags |= 1 << CPT_CPU_X86_SSE;
+#ifndef CONFIG_X86_64
+	if (cpu_has_xmm2)
+#endif
+		flags |= 1 << CPT_CPU_X86_SSE2;
+	if (cpu_has_mmx)
+		flags |= 1 << CPT_CPU_X86_MMX;
+	if (boot_cpu_has(X86_FEATURE_3DNOW))
+		flags |= 1 << CPT_CPU_X86_3DNOW;
+	if (boot_cpu_has(X86_FEATURE_3DNOWEXT))
+		flags |= 1 << CPT_CPU_X86_3DNOW2;
+	if (boot_cpu_has(X86_FEATURE_SEP))
+		flags |= 1 << CPT_CPU_X86_SEP;
+	if (boot_cpu_has(X86_FEATURE_SYSCALL))
+		flags |= 1 << CPT_CPU_X86_SYSCALL;
+#ifdef CONFIG_X86_64
+	if (boot_cpu_has(X86_FEATURE_SYSCALL) &&
+			boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		flags |= 1 << CPT_CPU_X86_SYSCALL32;
+#endif
+	if (boot_cpu_has(X86_FEATURE_SEP)
+#ifdef CONFIG_X86_64
+			&& boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
+#endif
+			)
+		flags |= 1 << CPT_CPU_X86_SEP32;
+#ifdef CONFIG_X86_64
+	flags |= 1 << CPT_CPU_X86_EMT64;
+#endif
+	if (virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_TEST, NULL) & NOTIFY_FAIL)
+		flags |= 1 << CPT_SLM_DMPRST;
+	return flags;
+}
+
+unsigned int test_kernel_config()
+{
+	unsigned int flags = 0;
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
+	flags |= 1 << CPT_KERNEL_CONFIG_PAE;
+#endif
+	return flags;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_kernel.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_kernel.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_kernel.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_kernel.h	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,159 @@
+/* Interface to kernel vars which we had to _add_. */
+
+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+
+
+struct env_create_param2;
+int		real_env_create(envid_t veid, unsigned flags, u32 class_id,
+				struct env_create_param2 *data, int datalen);
+
+struct ve_struct *get_ve_by_id(envid_t veid);
+
+#if defined(CONFIG_VZFS_FS) || defined(CONFIG_VZFS_FS_MODULE)
+void vefs_track_force_stop(struct super_block *super);
+
+void vefs_track_notify(struct dentry *vdentry, int track_cow);
+
+struct dentry * vefs_replaced_dentry(struct dentry *de);
+int vefs_is_renamed_dentry(struct dentry *vde, struct dentry *pde);
+#else
+static inline void vefs_track_force_stop(struct super_block *super) { };
+
+static inline void vefs_track_notify(struct dentry *vdentry, int track_cow) { };
+#endif
+
+int __copy_page_range(struct vm_area_struct *vma, struct mm_struct *src,
+		      unsigned long address, size_t size);
+
+long do_fork_pid(unsigned long clone_flags,
+	      unsigned long stack_start,
+	      struct pt_regs *regs,
+	      unsigned long stack_size,
+	      int __user *parent_tidptr,
+	      int __user *child_tidptr,
+	      long pid0);
+
+void wait_for_all_aios(struct kioctx *ctx);
+extern kmem_cache_t	*kioctx_cachep;
+extern void aio_kick_handler(void *);
+
+struct shmid_kernel;
+struct msg_queue;
+struct sem_array;
+
+int sysvipc_walk_msg(int (*func)(int, struct msg_queue*, void *), void *arg);
+int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg);
+struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg);
+int sysvipc_walk_sem(int (*func)(int, struct sem_array*, void *), void *arg);
+int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg);
+
+#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
+
+extern long __sched nanosleep_restart(struct restart_block *restart);
+extern long compat_nanosleep_restart(struct restart_block *restart);
+
+extern long do_fork_pid(unsigned long clone_flags,
+	      unsigned long stack_start,
+	      struct pt_regs *regs,
+	      unsigned long stack_size,
+	      int __user *parent_tidptr,
+	      int __user *child_tidptr,
+	      long pid);
+
+extern kmem_cache_t *sigqueue_cachep;
+
+extern struct or_calltable or_ipv4;
+extern struct or_calltable or_ipv6;
+extern struct tcp_func ipv6_mapped;
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+#define TASK_TRACED TASK_STOPPED
+#define unix_peer(sk) ((sk)->sk_pair)
+#define page_mapcount(pg) ((pg)->mapcount)
+#else
+#define unix_peer(sk) (unix_sk(sk)->peer)
+#endif
+
+#ifdef CONFIG_X86_64
+#define cpu_has_fxsr 1
+#endif
+
+static inline void do_gettimespec(struct timespec *ts)
+{
+	struct timeval tv;
+	do_gettimeofday(&tv);
+	ts->tv_sec = tv.tv_sec;
+	ts->tv_nsec = tv.tv_usec*1000;
+}
+
+int local_kernel_thread(int (*fn)(void *),
+		void * arg,
+		unsigned long flags,
+		pid_t pid);
+int asm_kernel_thread(int (*fn)(void *),
+		void * arg,
+		unsigned long flags,
+		pid_t pid);
+
+unsigned int test_cpu_caps_and_features(void);
+unsigned int test_kernel_config(void);
+
+#define test_one_flag_old(src, dst, flag, message, ret) \
+if (src & (1 << flag)) \
+	if (!(dst & (1 << flag))) { \
+		wprintk("Destination cpu does not have " message "\n"); \
+		ret = 1; \
+	}
+#define test_one_flag(src, dst, flag, message, ret) \
+if (src & (1 << flag)) \
+	if (!(dst & (1 << flag))) { \
+		eprintk_ctx("Destination cpu does not have " message "\n"); \
+		ret = 1; \
+	}
+
+#include <asm/div64.h>
+
+static inline long div_long_long_rem_signed(const long long dividend,
+					    const long divisor, long *remainder)
+{
+	long res;
+
+	if (unlikely(dividend < 0)) {
+		res = -div_long_long_rem(-dividend, divisor, remainder);
+		*remainder = -(*remainder);
+	} else
+		res = div_long_long_rem(dividend, divisor, remainder);
+
+	return res;
+}
+
+static inline struct timespec _ns_to_timespec(const s64 nsec)
+{
+	struct timespec ts;
+
+	if (!nsec)
+		return (struct timespec) {0, 0};
+
+	ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec);
+	if (unlikely(nsec < 0))
+		set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec);
+
+	return ts;
+}
+
+static inline long _ns_to_jiffies(s64 nsec)
+{
+	long jif, rem;
+
+#if BITS_PER_LONG == 32
+	if (nsec > (s64)(MAX_SCHEDULE_TIMEOUT-1) * TICK_NSEC)
+		nsec = (s64)(MAX_SCHEDULE_TIMEOUT-1) * TICK_NSEC;
+	else if (nsec < -(s64)(MAX_SCHEDULE_TIMEOUT-1) * TICK_NSEC)
+		nsec = -(s64)(MAX_SCHEDULE_TIMEOUT-1) * TICK_NSEC;
+#endif
+
+	jif = div_long_long_rem_signed(nsec, TICK_NSEC, &rem);
+
+	return jif;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_mm.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_mm.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_mm.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_mm.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,1018 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/errno.h>
+#include <linux/ve.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <asm/ldt.h>
+#include <asm/mmu.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_pagein.h"
+#include "cpt_ubc.h"
+
+static int collect_one_mm(struct mm_struct *mm, cpt_context_t * ctx)
+{
+	struct vm_area_struct *vma;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (vma->vm_file) {
+			if (cpt_object_add(CPT_OBJ_FILE, vma->vm_file, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+	if (cpt_add_ubc(mm_ub(mm), ctx) == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int cpt_collect_mm(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	int err;
+	int index;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->mm && cpt_object_add(CPT_OBJ_MM, tsk->mm, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	index = 1;
+	for_each_object(obj, CPT_OBJ_MM) {
+		struct mm_struct *mm = obj->o_obj;
+		if (obj->o_count != atomic_read(&mm->mm_users)) {
+			eprintk_ctx("mm_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&mm->mm_users));
+			return -EAGAIN;
+		}
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if ((err = collect_one_mm(mm, ctx)) != 0)
+			return err;
+	}
+
+	return 0;
+}
+
+
+#if 0
+static int page_is_zero(struct mm_struct *mm, struct page *page)
+{
+	char *maddr;
+	int i;
+
+	page_cache_get(page);
+	spin_unlock(&mm->page_table_lock);
+
+	maddr = kmap(page);
+	for (i=0; i<PAGE_SIZE/sizeof(unsigned long); i++) {
+		if (((unsigned long*)(maddr))[i])
+			break;
+	}
+	kunmap(page);
+
+	page_cache_release(page);
+	spin_lock(&mm->page_table_lock);
+	return (i == PAGE_SIZE/sizeof(unsigned long));
+}
+#endif
+
+static int zcnt, scnt, scnt0, ucnt;
+
+/* Function where_is_anon_page() returns address of a anonymous page in mm
+ * of already dumped process. This happens f.e. after fork(). We do not use
+ * this right now, just keep statistics, it is diffucult to restore such state,
+ * but the most direct use is to save space in dumped image. */
+
+static struct anon_vma *page_lock_anon_vma(struct page *page)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+	struct anon_vma *anon_vma = NULL;
+	unsigned long anon_mapping;
+
+	rcu_read_lock();
+	anon_mapping = (unsigned long) page->mapping;
+	if (!(anon_mapping & PAGE_MAPPING_ANON))
+		goto out;
+	if (!page_mapped(page))
+		goto out;
+
+	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
+	spin_lock(&anon_vma->lock);
+out:
+	rcu_read_unlock();
+#else
+	struct anon_vma *anon_vma = (struct anon_vma *) page->mapping;
+
+	spin_lock(&anon_vma->lock);
+#endif
+	return anon_vma;
+}
+
+static inline unsigned long
+vma_address0(struct page *page, struct vm_area_struct *vma)
+{
+	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	unsigned long address;
+
+	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+		address |= 1;
+	return address;
+}
+
+static int really_this_one(struct vm_area_struct *vma, unsigned long address,
+			   struct page *page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	int result;
+
+	pgd = pgd_offset(mm, address);
+	if (unlikely(!pgd_present(*pgd)))
+		return 0;
+
+	pmd = pmd_offset(pgd, address);
+	if (unlikely(!pmd_present(*pmd)))
+		return 0;
+
+	result = 0;
+	pte = pte_offset_map(pmd, address);
+	if (pte_present(*pte) &&
+	    page_to_pfn(page) == pte_pfn(*pte))
+		result = 1;
+	pte_unmap(pte);
+	return result;
+}
+
+static loff_t where_is_anon_page(cpt_object_t *mmobj, unsigned long mapaddr,
+				 struct page *page, cpt_context_t * ctx)
+{
+	loff_t mmptr = CPT_NULL;
+	struct anon_vma *anon_vma;
+	struct vm_area_struct *vma;
+	int idx = mmobj->o_index;
+
+	if (!PageAnon(page))
+		return CPT_NULL;
+
+	anon_vma = page_lock_anon_vma(page);
+	if (!anon_vma)
+		return CPT_NULL;
+
+	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		unsigned long addr = vma_address0(page, vma);
+		cpt_object_t *obj;
+
+		/* We do not try to support mremapped regions (addr != mapaddr),
+		 * only mmaps directly inherited via fork().
+		 * With this limitation we may check self-consistency of
+		 * vmas (vm_start, vm_pgoff, anon_vma) before
+		 * doing __copy_page_range() in rst_mm.
+		 */
+		if (mmobj->o_obj != vma->vm_mm && addr == mapaddr) {
+			obj = lookup_cpt_object(CPT_OBJ_MM, vma->vm_mm, ctx);
+			if (obj && obj->o_pos != CPT_NULL && obj->o_index < idx) {
+				if (spin_trylock(&vma->vm_mm->page_table_lock)) {
+					if (really_this_one(vma, addr, page)) {
+						mmptr = obj->o_pos;
+						idx = obj->o_index;
+					}
+					spin_unlock(&vma->vm_mm->page_table_lock);
+				}
+			}
+		}
+	}
+	spin_unlock(&anon_vma->lock);
+
+	return mmptr;
+}
+
+struct page_area
+{
+	int type;
+	unsigned long start;
+	unsigned long end;
+	pgoff_t pgoff;
+	loff_t mm;
+	__u64 list[16];
+};
+
+struct page_desc
+{
+	int	type;
+	pgoff_t	index;
+	loff_t	mm;
+	int	shared;
+};
+
+enum {
+	PD_ABSENT,
+	PD_COPY,
+	PD_ZERO,
+	PD_CLONE,
+	PD_FUNKEY,
+	PD_LAZY,
+	PD_ITER,
+	PD_ITERYOUNG,
+};
+
+/* 0: page can be obtained from backstore, or still not mapped anonymous  page,
+      or something else, which does not requre copy.
+   1: page requires copy
+   2: page requres copy but its content is zero. Quite useless.
+   3: wp page is shared after fork(). It is to be COWed when modified.
+   4: page is something unsupported... We copy it right now.
+ */
+
+
+
+static void page_get_desc(cpt_object_t *mmobj,
+			  struct vm_area_struct *vma, unsigned long addr,
+			  struct page_desc *pdesc, cpt_context_t * ctx)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	pgoff_t linear_index = (addr - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff;
+
+	pdesc->index = linear_index;
+	pdesc->shared = 0;
+
+	if (vma->vm_flags & VM_IO) {
+		pdesc->type = PD_ABSENT;
+		return;
+	}
+
+	spin_lock(&mm->page_table_lock);
+	pgd = pgd_offset(mm, addr);
+	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+		goto out_absent;
+	pmd = pmd_offset(pgd, addr);
+	if (pmd_none(*pmd))
+		goto out_absent;
+	if (pmd_huge(*pmd)) {
+		eprintk_ctx("page_huge\n");
+		goto out_unsupported;
+	}
+	if (unlikely(pmd_bad(*pmd)))
+		goto out_absent;
+retry:
+	ptep = pte_offset_map(pmd, addr);
+	if (!ptep)
+		goto out_absent;
+	pte = *ptep;
+	pte_unmap(ptep);
+	if (pte_none(pte))
+		goto out_absent;
+
+	if (!pte_present(pte)) {
+		if (pte_file(pte)) {
+			pdesc->index = pte_to_pgoff(pte);
+			goto out_absent;
+		}
+		if (vma->vm_flags & VM_SHARED) {
+			/* It is impossible: shared mappings cannot be in swap */
+			eprintk_ctx("shared mapping is not present: %08lx@%Ld\n", addr, mmobj->o_pos);
+			goto out_unsupported;
+		}
+
+		/* Otherwise it is in swap. */
+		if (!ctx->lazy_vm) {
+			int err;
+			/* If lazy transfer is not enabled,
+			 * raise it from swap now, so that we
+			 * save at least when the page is shared.
+			 */
+			spin_unlock(&mm->page_table_lock);
+			err = handle_mm_fault(mm, vma, addr, 0);
+			if (err == VM_FAULT_SIGBUS)
+				goto out_absent;
+			if (err == VM_FAULT_OOM)
+				goto out_absent;
+			err = 0;
+			spin_lock(&mm->page_table_lock);
+			goto retry;
+		}
+		goto out_lazy;
+	} else {
+		unsigned long pfn;
+		struct page *pg;
+
+		pfn = pte_pfn(pte);
+		if (pfn_valid(pfn) && (pg = pfn_to_page(pfn)) != NULL) {
+			if (pg->mapping && !PageAnon(pg)) {
+				if (vma->vm_file == NULL) {
+					eprintk_ctx("pg->mapping!=NULL for fileless vma: %08lx\n", addr);
+					goto out_unsupported;
+				}
+				if (vma->vm_file->f_mapping != pg->mapping) {
+					eprintk_ctx("pg->mapping!=f_mapping: %08lx %p %p %Ld\n", addr, vma->vm_file->f_mapping, pg->mapping, mmobj->o_pos);
+					goto out_unsupported;
+				}
+				pdesc->index = (pg->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
+				/* Page is in backstore. For us it is like
+				 * it is not present.
+				 */
+				goto out_absent;
+			}
+
+			if (PageReserved(pg)) {
+				/* Special case: ZERO_PAGE is used, when an
+				 * anonymous page is accessed but not written. */
+				if (pg == ZERO_PAGE(addr)) {
+					if (pte_write(pte)) {
+						eprintk_ctx("not funny already, writable ZERO_PAGE\n");
+						goto out_unsupported;
+					}
+					zcnt++;
+					goto out_absent;
+				}
+				eprintk_ctx("reserved page %lu at %08lx@%Ld\n", pg->index, addr, mmobj->o_pos);
+				goto out_unsupported;
+			}
+
+			if (!pg->mapping) {
+				eprintk_ctx("page without mapping at %08lx@%Ld\n", addr, mmobj->o_pos);
+				goto out_unsupported;
+			}
+
+			if (pg->mapping && page_mapcount(pg) > 1) {
+				pdesc->shared = 1;
+				pdesc->mm = where_is_anon_page(mmobj, addr, pg, ctx);
+				if (pdesc->mm != CPT_NULL) {
+					scnt0++;
+					goto out_clone;
+				} else {
+					scnt++;
+				}
+			}
+
+			if (ctx->iter_done &&
+			    test_bit(PG_checkpointed, &pg->flags)) {
+				if (pte_write(pte)) {
+					wprintk_ctx("writable PG_checkpointed page\n");
+				}
+				pdesc->index = page_to_pfn(pg);
+				goto out_iter;
+			}
+
+			if (!pte_young(pte))
+				goto out_lazy;
+		}
+	}
+	spin_unlock(&mm->page_table_lock);
+	pdesc->type = PD_COPY;
+	return;
+
+out_iter:
+	pdesc->type = pte_young(pte) ? PD_ITERYOUNG : PD_ITER;
+	spin_unlock(&mm->page_table_lock);
+	return;
+
+out_lazy:
+	spin_unlock(&mm->page_table_lock);
+	pdesc->type = PD_LAZY;
+	return;
+
+out_absent:
+	spin_unlock(&mm->page_table_lock);
+	pdesc->type = PD_ABSENT;
+	return;
+
+out_clone:
+	spin_unlock(&mm->page_table_lock);
+	pdesc->type = PD_CLONE;
+	return;
+
+out_unsupported:
+	spin_unlock(&mm->page_table_lock);
+	ucnt++;
+	pdesc->type = PD_FUNKEY;
+	return;
+}
+
+/* ATTN: We give "current" to get_user_pages(). This is wrong, but get_user_pages()
+ * does not really need this thing. It just stores some page fault stats there.
+ *
+ * BUG: some archs (f.e. sparc64, but not Intel*) require flush cache pages
+ * before accessing vma.
+ */
+void dump_pages(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, struct cpt_context *ctx)
+{
+#define MAX_PAGE_BATCH 16
+	struct page *pg[MAX_PAGE_BATCH];
+	int npages = (end - start)/PAGE_SIZE;
+	int count = 0;
+
+	while (count < npages) {
+		int copy = npages - count;
+		int n;
+
+		if (copy > MAX_PAGE_BATCH)
+			copy = MAX_PAGE_BATCH;
+		n = get_user_pages(current, vma->vm_mm, start, copy,
+				   0, 1, pg, NULL);
+		if (n == copy) {
+			int i;
+			for (i=0; i<n; i++) {
+				char *maddr = kmap(pg[i]);
+				ctx->write(maddr, PAGE_SIZE, ctx);
+				kunmap(pg[i]);
+			}
+		} else {
+			eprintk_ctx("get_user_pages fault");
+			for ( ; n > 0; n--)
+				page_cache_release(pg[n-1]);
+			return;
+		}
+		start += n*PAGE_SIZE;
+		count += n;
+		for ( ; n > 0; n--)
+			page_cache_release(pg[n-1]);
+	}
+	return;
+}
+
+int dump_page_block(struct vm_area_struct *vma, struct cpt_page_block *pgb,
+		    int copy,
+		    struct cpt_context *ctx)
+{
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb->cpt_object = (copy != PD_LAZY) ? CPT_OBJ_PAGES : CPT_OBJ_LAZYPAGES;
+	pgb->cpt_hdrlen = sizeof(*pgb);
+	pgb->cpt_content = (copy == PD_COPY || copy == PD_LAZY) ? CPT_CONTENT_DATA : CPT_CONTENT_VOID;
+
+	ctx->write(pgb, sizeof(*pgb), ctx);
+	if (copy == PD_COPY || copy == PD_LAZY)
+		dump_pages(vma, pgb->cpt_start, pgb->cpt_end, ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_remappage_block(struct vm_area_struct *vma, struct page_area *pa,
+			 struct cpt_context *ctx)
+{
+	struct cpt_remappage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_REMAPPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	pgb.cpt_pgoff = pa->pgoff - (pa->end-pa->start)/PAGE_SIZE + 1;
+
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_copypage_block(struct vm_area_struct *vma, struct page_area *pa,
+			struct cpt_context *ctx)
+{
+	struct cpt_copypage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_COPYPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	pgb.cpt_source = pa->mm;
+
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int alloc_pgin_index(struct vm_area_struct *vma, unsigned long addr, int npg,
+		     cpt_context_t *ctx)
+{
+	u32 index, i;
+	struct pagein_desc **dir = ctx->pgin_dir;
+
+	index = ctx->lazypages;
+	ctx->lazypages += npg;
+
+	if (unlikely(dir == NULL)) {
+		ctx->pgin_dir = (struct pagein_desc**)vmalloc(PGINDIR_SIZE);
+		if (ctx->pgin_dir == NULL)
+			return -ENOMEM;
+		memset(ctx->pgin_dir, 0, PGINDIR_SIZE);
+		dir = ctx->pgin_dir;
+	}
+
+	for (i = index; i < index + npg; i++) {
+		u32 page_nr = i/IDX_PER_PAGE;
+
+		if (page_nr >= PGINDIR_SIZE/sizeof(struct pagein_desc*)) {
+			eprintk_ctx("pgin swap space overflow\n");
+			return -ENOSPC;
+		}
+
+		if (unlikely(dir[page_nr] == NULL)) {
+			dir[page_nr] = (struct pagein_desc*)__get_free_page(GFP_KERNEL);
+			if (dir[page_nr] == NULL)
+				return -ENOMEM;
+			memset(dir[page_nr], 0, PAGE_SIZE);
+		}
+
+		dir[page_nr][i%IDX_PER_PAGE].mm = vma->vm_mm;
+		dir[page_nr][i%IDX_PER_PAGE].offset = addr;
+		addr += PAGE_SIZE;
+	}
+
+	return index;
+}
+
+int dump_lazypage_block(struct vm_area_struct *vma, struct page_area *pa,
+			cpt_context_t *ctx)
+{
+	struct cpt_lazypage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_LAZYPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	pgb.cpt_index = alloc_pgin_index(vma, pa->start,
+					 (pa->end-pa->start)/PAGE_SIZE, ctx);
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+
+int dump_iterpage_block(struct vm_area_struct *vma, struct page_area *pa,
+			cpt_context_t *ctx)
+{
+	struct cpt_iterpage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = pa->type == PD_ITER ? CPT_OBJ_ITERPAGES :
+		CPT_OBJ_ITERYOUNGPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	ctx->write(&pgb, sizeof(pgb), ctx);
+
+	ctx->write(pa->list, 8*((pa->end-pa->start)/PAGE_SIZE), ctx);
+
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+
+static int can_expand(struct page_area *pa, struct page_desc *pd)
+{
+	if (pa->start == pa->end)
+		return 1;
+	if (pa->type != pd->type)
+		return 0;
+	if (pa->type == PD_ITER || pa->type == PD_ITERYOUNG) {
+		if (pa->end - pa->start >= PAGE_SIZE*16)
+			return 0;
+		pa->list[(pa->end - pa->start)/PAGE_SIZE] = pd->index;
+	}
+	if (pa->type == PD_ABSENT)
+		return pd->index == pa->pgoff + 1;
+	if (pa->type == PD_CLONE)
+		return pd->mm == pa->mm;
+	return 1;
+}
+
+static int dump_one_vma(cpt_object_t *mmobj,
+			struct vm_area_struct *vma, struct cpt_context *ctx)
+{
+	struct cpt_vma_image *v = cpt_get_buf(ctx);
+	unsigned long addr;
+	loff_t saved_object;
+	struct cpt_page_block pgb;
+	struct page_area pa;
+	int cloned_pages = 0;
+
+	cpt_push_object(&saved_object, ctx);
+
+	v->cpt_object = CPT_OBJ_VMA;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_start = vma->vm_start;
+	v->cpt_end = vma->vm_end;
+	v->cpt_flags = vma->vm_flags;
+	if (vma->vm_flags&VM_HUGETLB) {
+		eprintk_ctx("huge TLB VMAs are still not supported\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_pgprot = vma->vm_page_prot.pgprot;
+	v->cpt_pgoff = vma->vm_pgoff;
+	v->cpt_file = CPT_NULL;
+	if ((void *)vma->vm_start == vma->vm_mm->context.vdso &&
+			vma->vm_ops == &vsyscall_vm_ops)
+		v->cpt_type = CPT_VMA_VDSO;
+	else
+		v->cpt_type = CPT_VMA_TYPE_0;
+	v->cpt_anonvma = 0;
+
+	/* We have to remember what VMAs are bound to one anon_vma.
+	 * So, we store an identifier of group of VMAs. It is handy
+	 * to use absolute address of anon_vma as this identifier. */
+	v->cpt_anonvmaid = (unsigned long)vma->anon_vma;
+
+	if (vma->vm_file) {
+		struct file *filp;
+		cpt_object_t *obj = lookup_cpt_object(CPT_OBJ_FILE, vma->vm_file, ctx);
+		if (obj == NULL) BUG();
+		filp = obj->o_obj;
+		if (filp->f_op &&
+		    filp->f_op->read == NULL &&
+		    filp->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_TMPFS)
+			v->cpt_type = CPT_VMA_TYPE_SHM;
+		v->cpt_file = obj->o_pos;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	if (v->cpt_type == CPT_VMA_VDSO)
+		goto out;
+
+	pa.type = PD_ABSENT;
+	pa.pgoff = vma->vm_pgoff;
+	pa.mm = CPT_NULL;
+	pa.start = vma->vm_start;
+	pa.end = vma->vm_start;
+
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+		struct page_desc pd;
+
+		page_get_desc(mmobj, vma, addr, &pd, ctx);
+		cloned_pages += pd.shared;
+
+		if (pd.type == PD_FUNKEY) {
+			eprintk_ctx("dump_one_vma: funkey page\n");
+			return -EINVAL;
+		}
+
+		if (pd.type == PD_LAZY &&
+		    (ctx->lazy_vm == 0 || (vma->vm_flags&VM_LOCKED)))
+			pd.type = PD_COPY;
+
+		if (!can_expand(&pa, &pd)) {
+			if (pa.type == PD_COPY ||
+			    pa.type == PD_ZERO) {
+				pgb.cpt_start = pa.start;
+				pgb.cpt_end = pa.end;
+				dump_page_block(vma, &pgb, pa.type, ctx);
+			} else if (pa.type == PD_CLONE) {
+				dump_copypage_block(vma, &pa, ctx);
+				cloned_pages++;
+			} else if (pa.type == PD_LAZY) {
+				dump_lazypage_block(vma, &pa, ctx);
+			} else if (pa.type == PD_ITER || pa.type == PD_ITERYOUNG) {
+				dump_iterpage_block(vma, &pa, ctx);
+				cloned_pages++;
+			} else if (pa.type == PD_ABSENT &&
+				   pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
+				dump_remappage_block(vma, &pa, ctx);
+			}
+			pa.start = addr;
+		}
+		pa.type = pd.type;
+		pa.end = addr + PAGE_SIZE;
+		pa.pgoff = pd.index;
+		if (addr == pa.start)
+			pa.list[0] = pd.index;
+		pa.mm = pd.mm;
+	}
+
+	if (pa.end > pa.start) {
+		if (pa.type == PD_COPY ||
+		    pa.type == PD_ZERO) {
+			pgb.cpt_start = pa.start;
+			pgb.cpt_end = pa.end;
+			dump_page_block(vma, &pgb, pa.type, ctx);
+		} else if (pa.type == PD_CLONE) {
+			dump_copypage_block(vma, &pa, ctx);
+			cloned_pages++;
+		} else if (pa.type == PD_LAZY) {
+			dump_lazypage_block(vma, &pa, ctx);
+		} else if (pa.type == PD_ITER || pa.type == PD_ITERYOUNG) {
+			dump_iterpage_block(vma, &pa, ctx);
+			cloned_pages++;
+		} else if (pa.type == PD_ABSENT &&
+			   pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
+			dump_remappage_block(vma, &pa, ctx);
+		}
+	}
+
+	if (cloned_pages) {
+		__u32 anonvma = 1;
+		loff_t anonpos = ctx->current_object + offsetof(struct cpt_vma_image, cpt_anonvma);
+		ctx->pwrite(&anonvma, 4, ctx, anonpos);
+	}
+
+out:
+	cpt_close_object(ctx);
+
+	cpt_pop_object(&saved_object, ctx);
+
+	return 0;
+}
+
+static int dump_one_aio_ctx(struct mm_struct *mm, struct kioctx *aio_ctx,
+			    cpt_context_t *ctx)
+{
+	loff_t saved_object;
+	struct cpt_aio_ctx_image aimg;
+
+	if (!list_empty(&aio_ctx->run_list)) {
+		/* This is impossible at least with kernel 2.6.8.1. */
+		eprintk_ctx("run list is not empty, cannot suspend AIO\n");
+		return -EBUSY;
+	}
+
+	/* Wait for pending IOCBs. Linux AIO is mostly _fake_.
+	 * It is acltaully synchronous, except for direct IO and
+	 * some funny raw USB things, which cannot happen inside VE.
+	 * However, we do this for future.
+	 */
+	wait_for_all_aios(aio_ctx);
+
+	if (!list_empty(&aio_ctx->run_list) ||
+	    !list_empty(&aio_ctx->active_reqs) ||
+	    aio_ctx->reqs_active) {
+		eprintk_ctx("were not able to suspend AIO\n");
+		return -EBUSY;
+	}
+
+	cpt_push_object(&saved_object, ctx);
+
+	aimg.cpt_next = CPT_ALIGN(sizeof(aimg));
+	aimg.cpt_object = CPT_OBJ_AIO_CONTEXT;
+	aimg.cpt_hdrlen = sizeof(aimg);
+	aimg.cpt_content = CPT_CONTENT_ARRAY;
+
+	aimg.cpt_max_reqs = aio_ctx->max_reqs;
+	aimg.cpt_ring_pages = aio_ctx->ring_info.nr_pages;
+	aimg.cpt_nr = aio_ctx->ring_info.nr;
+	aimg.cpt_tail = aio_ctx->ring_info.tail;
+	aimg.cpt_mmap_base = aio_ctx->ring_info.mmap_base;
+
+	ctx->write(&aimg, sizeof(aimg), ctx);
+
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+static int dump_one_mm(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = obj->o_obj;
+	struct vm_area_struct *vma;
+	struct cpt_mm_image *v = cpt_get_buf(ctx);
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = -1;
+	v->cpt_object = CPT_OBJ_MM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_start_code = mm->start_code;
+	v->cpt_end_code = mm->end_code;
+	v->cpt_start_data = mm->start_data;
+	v->cpt_end_data = mm->end_data;
+	v->cpt_start_brk = mm->start_brk;
+	v->cpt_brk = mm->brk;
+	v->cpt_start_stack = mm->start_stack;
+	v->cpt_start_arg = mm->arg_start;
+	v->cpt_end_arg = mm->arg_end;
+	v->cpt_start_env = mm->env_start;
+	v->cpt_end_env = mm->env_end;
+	v->cpt_def_flags = mm->def_flags;
+	v->cpt_mmub = cpt_lookup_ubc(mm_ub(mm), ctx);
+	v->cpt_dumpable = mm->dumpable;
+	v->cpt_vps_dumpable = mm->vps_dumpable;
+	v->cpt_used_hugetlb = 0;
+#ifdef CONFIG_HUGETLB_PAGE
+	v->cpt_used_hugetlb = mm->used_hugetlb;
+#endif
+	v->cpt_vdso = (__u32)(unsigned long)mm->context.vdso;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (mm->context.size) {
+		loff_t saved_object;
+		struct cpt_obj_bits b;
+		int size;
+#ifndef CONFIG_X86_64
+		int i;
+#endif
+
+		dprintk_ctx("nontrivial LDT\n");
+
+		cpt_push_object(&saved_object, ctx);
+
+		cpt_open_object(NULL, ctx);
+		b.cpt_next = CPT_NULL;
+		b.cpt_object = CPT_OBJ_BITS;
+		b.cpt_hdrlen = sizeof(b);
+		b.cpt_content = CPT_CONTENT_MM_CONTEXT;
+		b.cpt_size = mm->context.size*LDT_ENTRY_SIZE;
+
+		ctx->write(&b, sizeof(b), ctx);
+
+		size = mm->context.size*LDT_ENTRY_SIZE;
+
+#ifdef CONFIG_X86_64
+		ctx->write(mm->context.ldt, size, ctx);
+#else
+		for (i = 0; i < size; i += PAGE_SIZE) {
+			int nr = i / PAGE_SIZE, bytes;
+			char *kaddr = kmap(mm->context.ldt_pages[nr]);
+
+			bytes = size - i;
+			if (bytes > PAGE_SIZE)
+				bytes = PAGE_SIZE;
+			ctx->write(kaddr, bytes, ctx);
+			kunmap(mm->context.ldt_pages[nr]);
+		}
+#endif
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_object, ctx);
+	}
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		int err;
+
+		if ((err = dump_one_vma(obj, vma, ctx)) != 0)
+			return err;
+	}
+
+	if (mm->ioctx_list) {
+		struct kioctx *aio_ctx;
+		int err;
+
+		for (aio_ctx = mm->ioctx_list; aio_ctx; aio_ctx = aio_ctx->next)
+			if ((err = dump_one_aio_ctx(mm, aio_ctx, ctx)) != 0)
+				return err;
+	}
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+int cpt_dump_vm(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	scnt = scnt0 = zcnt = 0;
+
+	cpt_open_section(ctx, CPT_SECT_MM);
+
+	for_each_object(obj, CPT_OBJ_MM) {
+		int err;
+
+		if ((err = dump_one_mm(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+
+	if (scnt)
+		dprintk_ctx("cpt_dump_vm: %d shared private anon pages\n", scnt);
+	if (scnt0)
+		dprintk_ctx("cpt_dump_vm: %d anon pages are cloned\n", scnt0);
+	if (zcnt)
+		dprintk_ctx("cpt_dump_vm: %d silly pages canceled\n", zcnt);
+	return 0;
+}
+
+int scan_one_task(task_t *p)
+{
+	struct mm_struct *mm = get_task_mm(p);
+	unsigned long addr;
+	int young = 0;
+
+	if (unlikely(mm == NULL))
+		return 0;
+
+	spin_lock(&mm->page_table_lock);
+	for (addr = 0; addr < TASK_SIZE; addr += PGDIR_SIZE) {
+		pgd_t * pgd = pgd_offset(mm, addr);
+		unsigned long addr2;
+
+		if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+			continue;
+
+		for (addr2 = addr; addr2 < addr + PGDIR_SIZE; addr2 += PMD_SIZE) {
+			pmd_t * pmd = pmd_offset(pgd, addr2);
+			unsigned long addr3;
+
+			if (pmd_none(*pmd) || pmd_huge(*pmd) || unlikely(pmd_bad(*pmd)))
+				continue;
+
+			for (addr3 = addr2; addr3 < addr2 + PMD_SIZE; addr3 += PAGE_SIZE) {
+				pte_t *pte = pte_offset_map(pmd, addr3);
+				if (!pte)
+					continue;
+				if (pte_present(*pte) &&
+				    ptep_test_and_clear_young(pte))
+					young++;
+				pte_unmap(pte);
+			}
+		}
+	}
+	spin_unlock(&mm->page_table_lock);
+	mmput(mm);
+	return young;
+}
+
+int cpt_mm_prepare(unsigned long veid)
+{
+	task_t *p;
+	struct ve_struct *env;
+	int young_tot = 0;
+
+	env = get_ve_by_id(veid);
+	if (env == NULL)
+		return -ESRCH;
+
+	write_lock_irq(&tasklist_lock);
+
+	/* The idea is to take task from tail of list and relink it
+	 * to the head. Then we can release tasklist lock. Stop condition
+	 * is when this task is init_entry, which is supposed to stay
+	 * at the head of list in normal state.
+	 *
+	 * If someone else is messing with the list, we will miss some
+	 * tasks. If it is another cpt_mm_prepare(), the work will be
+	 * done anyway.
+	 */
+	do {
+		/* VE is empty, stop scanning. */
+		if (list_empty(&env->vetask_lh))
+			break;
+
+		p = VE_TASK_LIST_2_TASK(env->vetask_lh.prev);
+		REMOVE_VE_LINKS(p);
+		list_add(&VE_TASK_INFO(p)->vetask_list, &env->vetask_lh);
+
+		get_task_struct(p);
+		write_unlock_irq(&tasklist_lock);
+
+		young_tot += scan_one_task(p);
+
+		put_task_struct(p);
+
+		cond_resched();
+
+		write_lock_irq(&tasklist_lock);
+	} while (p != env->init_entry);
+
+	write_unlock_irq(&tasklist_lock);
+
+	put_ve(env);
+
+	return young_tot;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_mm.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_mm.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_mm.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_mm.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,40 @@
+int cpt_collect_mm(cpt_context_t *);
+
+int cpt_dump_vm(struct cpt_context *ctx);
+
+__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int cpt_mm_prepare(unsigned long veid);
+
+int cpt_free_pgin_dir(struct cpt_context *);
+int cpt_start_pagein(struct cpt_context *);
+int rst_setup_pagein(struct cpt_context *);
+int rst_complete_pagein(struct cpt_context *, int);
+int rst_pageind(struct cpt_context *);
+int rst_swapoff(struct cpt_context *);
+int cpt_iteration(cpt_context_t *ctx);
+int rst_iteration(cpt_context_t *ctx);
+void rst_drop_iter_dir(cpt_context_t *ctx);
+int rst_iter(struct vm_area_struct *vma, u64 pfn,
+	     unsigned long addr, cpt_context_t * ctx);
+
+extern int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack,
+		unsigned long map_address);
+
+#ifdef CONFIG_X86_64
+extern char *syscall32_page;
+extern struct vm_operations_struct syscall32_vm_ops;
+#define vsyscall_addr syscall32_page
+#define vsyscall_vm_ops syscall32_vm_ops
+#define SYSEXIT_RETURN VSYSCALL32_SYSEXIT
+#else
+extern struct page *sysenter_page;
+extern struct vm_operations_struct special_mapping_vmops;
+#define vsyscall_addr page_address(sysenter_page)
+#define vsyscall_vm_ops special_mapping_vmops
+extern void SYSENTER_RETURN_OFFSET;
+#define SYSEXIT_RETURN (current->mm->context.vdso + \
+		(unsigned long)&SYSENTER_RETURN_OFFSET)
+#endif
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_net.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_net.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_net.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_net.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,359 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/ve.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+#include <linux/nfcalls.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+#include "cpt_syscalls.h"
+
+int cpt_dump_link(struct cpt_context * ctx)
+{
+	struct net_device *dev;
+
+	cpt_open_section(ctx, CPT_SECT_NET_DEVICE);
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct cpt_netdev_image v;
+
+		cpt_open_object(NULL, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_NET_DEVICE;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_VOID;
+
+		v.cpt_index = dev->ifindex;
+		v.cpt_flags = dev->flags;
+		memcpy(v.cpt_name, dev->name, IFNAMSIZ);
+		ctx->write(&v, sizeof(v), ctx);
+		cpt_close_object(ctx);
+
+		if (dev != get_exec_env()->_loopback_dev &&
+#if defined(CONFIG_VE_ETHDEV) || defined(CONFIG_VE_ETHDEV_MODULE)
+		    !(KSYMREF(veth_open) && dev->open == KSYMREF(veth_open)) &&
+#endif
+		    dev != get_exec_env()->_venet_dev) {
+			eprintk_ctx("unsupported netdevice %s\n", dev->name);
+			cpt_close_section(ctx);
+			return -EBUSY;
+		}
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_suspend_network(struct cpt_context *ctx)
+{
+	get_exec_env()->disable_net = 1;
+	synchronize_net();
+	return 0;
+}
+
+int cpt_resume_network(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	env->disable_net = 0;
+	put_ve(env);
+	return 0;
+}
+
+int cpt_dump_ifaddr(struct cpt_context * ctx)
+{
+	struct net_device *dev;
+
+	cpt_open_section(ctx, CPT_SECT_NET_IFADDR);
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct in_device *idev = in_dev_get(dev);
+		struct in_ifaddr *ifa;
+
+		if (!idev)
+			continue;
+
+		for (ifa = idev->ifa_list; ifa; ifa = ifa->ifa_next) {
+			struct cpt_ifaddr_image v;
+			cpt_open_object(NULL, ctx);
+
+			v.cpt_next = CPT_NULL;
+			v.cpt_object = CPT_OBJ_NET_IFADDR;
+			v.cpt_hdrlen = sizeof(v);
+			v.cpt_content = CPT_CONTENT_VOID;
+
+			v.cpt_index = dev->ifindex;
+			v.cpt_family = AF_INET;
+			v.cpt_masklen = ifa->ifa_prefixlen;
+			v.cpt_flags = ifa->ifa_flags;
+			v.cpt_scope = ifa->ifa_scope;
+			memset(&v.cpt_address, 0, sizeof(v.cpt_address));
+			memset(&v.cpt_peer, 0, sizeof(v.cpt_peer));
+			memset(&v.cpt_broadcast, 0, sizeof(v.cpt_broadcast));
+			v.cpt_address[0] = ifa->ifa_local;
+			v.cpt_peer[0] = ifa->ifa_address;
+			v.cpt_broadcast[0] = ifa->ifa_broadcast;
+			memcpy(v.cpt_label, ifa->ifa_label, IFNAMSIZ);
+			ctx->write(&v, sizeof(v), ctx);
+			cpt_close_object(ctx);
+		}
+		in_dev_put(idev);
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int cpt_dump_route(struct cpt_context * ctx)
+{
+	int err;
+	struct socket *sock;
+	struct msghdr msg;
+	struct iovec iov;
+	struct {
+		struct nlmsghdr nlh;
+		struct rtgenmsg g;
+	} req;
+	struct sockaddr_nl nladdr;
+	struct cpt_object_hdr v;
+	mm_segment_t oldfs;
+	char *pg;
+
+	err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
+	if (err)
+		return err;
+
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family = AF_NETLINK;
+
+	req.nlh.nlmsg_len = sizeof(req);
+	req.nlh.nlmsg_type = RTM_GETROUTE;
+	req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
+	req.nlh.nlmsg_pid = 0;
+	req.g.rtgen_family = AF_INET;
+
+	iov.iov_base=&req;
+	iov.iov_len=sizeof(req);
+	msg.msg_name=&nladdr;
+	msg.msg_namelen=sizeof(nladdr);
+	msg.msg_iov=&iov;
+	msg.msg_iovlen=1;
+	msg.msg_control=NULL;
+	msg.msg_controllen=0;
+	msg.msg_flags=MSG_DONTWAIT;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = sock_sendmsg(sock, &msg, sizeof(req));
+	set_fs(oldfs);
+
+	if (err < 0)
+		goto out_sock;
+
+	pg = (char*)__get_free_page(GFP_KERNEL);
+	if (pg == NULL) {
+		err = -ENOMEM;
+		goto out_sock;
+	}
+
+	cpt_open_section(ctx, CPT_SECT_NET_ROUTE);
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_ROUTE;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NLMARRAY;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+
+	for (;;) {
+		struct nlmsghdr *h;
+
+		iov.iov_base = pg;
+		iov.iov_len = PAGE_SIZE;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
+		set_fs(oldfs);
+
+		if (err < 0)
+			goto out_sock_pg;
+		if (msg.msg_flags & MSG_TRUNC) {
+			err = -ENOBUFS;
+			goto out_sock_pg;
+		}
+
+		h = (struct nlmsghdr*)pg;
+		while (NLMSG_OK(h, err)) {
+			if (h->nlmsg_type == NLMSG_DONE) {
+				err = 0;
+				goto done;
+			}
+			if (h->nlmsg_type == NLMSG_ERROR) {
+				struct nlmsgerr *errm = (struct nlmsgerr*)NLMSG_DATA(h);
+				err = errm->error;
+				eprintk_ctx("NLMSG error: %d\n", errm->error);
+				goto done;
+			}
+			if (h->nlmsg_type != RTM_NEWROUTE) {
+				eprintk_ctx("NLMSG: %d\n", h->nlmsg_type);
+				err = -EINVAL;
+				goto done;
+			}
+			ctx->write(h, NLMSG_ALIGN(h->nlmsg_len), ctx);
+			h = NLMSG_NEXT(h, err);
+		}
+		if (err) {
+			eprintk_ctx("!!!Remnant of size %d %d %d\n", err, h->nlmsg_len, h->nlmsg_type);
+			err = -EINVAL;
+			break;
+		}
+	}
+done:
+	cpt_close_object(ctx);
+	cpt_close_section(ctx);
+
+out_sock_pg:
+	free_page((unsigned long)pg);
+out_sock:
+	sock_release(sock);
+	return err;
+}
+
+static int dumpfn(void *arg)
+{
+	int i;
+	int *pfd = arg;
+	char *argv[] = { "iptables-save", "-c", NULL };
+
+	i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
+	if (i < 0) {
+		eprintk("cannot enter ve to dump iptables\n");
+		module_put(THIS_MODULE);
+		return 1;
+	}
+
+	if (pfd[1] != 1)
+		sc_dup2(pfd[1], 1);
+
+	for (i=0; i<current->files->max_fds; i++) {
+		if (i != 1)
+			sc_close(i);
+	}
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/sbin/iptables-save", argv, NULL);
+	if (i == -ENOENT)
+		i = sc_execve("/usr/sbin/iptables-save", argv, NULL);
+	eprintk("failed to exec iptables-save: %d\n", i);
+	return -1;
+}
+
+
+static int cpt_dump_iptables(struct cpt_context * ctx)
+{
+	int err;
+	int pid;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	char buf[16];
+	loff_t pos;
+	int n;
+
+	err = sc_pipe(pfd);
+	if (err < 0) {
+		eprintk_ctx("sc_pipe: %d\n", err);
+		return err;
+	}
+	err = pid = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
+	if (err < 0) {
+		eprintk_ctx("local_kernel_thread: %d\n", err);
+		goto out;
+	}
+	f = fget(pfd[0]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	cpt_open_section(ctx, CPT_SECT_NET_IPTABLES);
+
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NAME;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	pos = ctx->file->f_pos;
+	do {
+		mm_segment_t oldfs;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
+		set_fs(oldfs);
+		if (n > 0)
+			ctx->write(buf, n, ctx);
+	} while (n > 0);
+
+	if (n < 0)
+		eprintk_ctx("read: %d\n", n);
+
+	fput(f);
+
+	if ((err = sc_waitx(pid, 0)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+
+	if (ctx->file->f_pos != pos) {
+		buf[0] = 0;
+		ctx->write(buf, 1, ctx);
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_close_section(ctx);
+	} else {
+		pos = ctx->current_section;
+		cpt_close_object(ctx);
+		cpt_close_section(ctx);
+		ctx->sections[CPT_SECT_NET_IPTABLES] = CPT_NULL;
+		ctx->file->f_pos = pos;
+	}
+	return n;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	return err;
+}
+
+int cpt_dump_ifinfo(struct cpt_context * ctx)
+{
+	int err;
+
+	rtnl_lock();
+	err = cpt_dump_link(ctx);
+	if (!err)
+		err = cpt_dump_ifaddr(ctx);
+	rtnl_unlock();
+	if (!err)
+		err = cpt_dump_route(ctx);
+	if (!err)
+		err = cpt_dump_iptables(ctx);
+	return err;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_net.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_net.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_net.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_net.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,7 @@
+int cpt_dump_ifinfo(struct cpt_context *ctx);
+int rst_restore_net(struct cpt_context *ctx);
+int cpt_suspend_network(struct cpt_context *ctx);
+int cpt_resume_network(struct cpt_context *ctx);
+int rst_resume_network(struct cpt_context *ctx);
+int cpt_dump_ip_conntrack(struct cpt_context *ctx);
+int rst_restore_ip_conntrack(struct cpt_context * ctx);
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_obj.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_obj.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_obj.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_obj.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,161 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = kmalloc(sizeof(cpt_object_t), gfp);
+	if (obj) {
+		INIT_LIST_HEAD(&obj->o_list);
+		INIT_LIST_HEAD(&obj->o_hash);
+		INIT_LIST_HEAD(&obj->o_alist);
+		obj->o_count = 1;
+		obj->o_pos = CPT_NULL;
+		obj->o_lock = 0;
+		obj->o_parent = NULL;
+		obj->o_index = CPT_NOINDEX;
+		obj->o_obj = NULL;
+		obj->o_image = NULL;
+		ctx->objcount++;
+	}
+	return obj;
+}
+EXPORT_SYMBOL(alloc_cpt_object);
+
+void free_cpt_object(cpt_object_t *obj, cpt_context_t *ctx)
+{
+	list_del(&obj->o_alist);
+	kfree(obj);
+	ctx->objcount--;
+}
+
+void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_context_t *ctx)
+{
+	list_add_tail(&obj->o_list, &ctx->object_array[type]);
+}
+EXPORT_SYMBOL(intern_cpt_object);
+
+void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj,
+			cpt_object_t *head, cpt_context_t *ctx)
+{
+	list_add(&obj->o_list, &head->o_list);
+}
+EXPORT_SYMBOL(insert_cpt_object);
+
+cpt_object_t * __cpt_object_add(enum _cpt_object_type type, void *p,
+		unsigned gfp_mask, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(type, p, ctx);
+
+	if (obj) {
+		obj->o_count++;
+		return obj;
+	}
+
+	if ((obj = alloc_cpt_object(gfp_mask, ctx)) != NULL) {
+		if (p)
+			cpt_obj_setobj(obj, p, ctx);
+		intern_cpt_object(type, obj, ctx);
+		return obj;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(__cpt_object_add);
+
+cpt_object_t * cpt_object_add(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
+{
+	return __cpt_object_add(type, p, GFP_KERNEL, ctx);
+}
+EXPORT_SYMBOL(cpt_object_add);
+
+cpt_object_t * cpt_object_get(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(type, p, ctx);
+
+	if (obj)
+		obj->o_count++;
+
+	return obj;
+}
+EXPORT_SYMBOL(cpt_object_get);
+
+int cpt_object_init(cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<CPT_OBJ_MAX; i++) {
+		INIT_LIST_HEAD(&ctx->object_array[i]);
+	}
+	return 0;
+}
+
+int cpt_object_destroy(cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<CPT_OBJ_MAX; i++) {
+		while (!list_empty(&ctx->object_array[i])) {
+			struct list_head *head = ctx->object_array[i].next;
+			cpt_object_t *obj = list_entry(head, cpt_object_t, o_list);
+			list_del(head);
+			if (obj->o_image)
+				kfree(obj->o_image);
+			free_cpt_object(obj, ctx);
+		}
+	}
+	if (ctx->objcount != 0)
+		eprintk_ctx("BUG: ctx->objcount=%d\n", ctx->objcount);
+	return 0;
+}
+
+cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_obj == p)
+			return obj;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(lookup_cpt_object);
+
+cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_pos == pos)
+			return obj;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(lookup_cpt_obj_bypos);
+
+cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_index == index)
+			return obj;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(lookup_cpt_obj_byindex);
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_obj.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_obj.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_obj.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_obj.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,62 @@
+#ifndef __CPT_OBJ_H_
+#define __CPT_OBJ_H_ 1
+
+#include <linux/list.h>
+#include <linux/cpt_image.h>
+
+typedef struct _cpt_object
+{
+	struct list_head	o_list;
+	struct list_head	o_hash;
+	int			o_count;
+	int			o_index;
+	int			o_lock;
+	loff_t			o_pos;
+	loff_t			o_ppos;
+	void			*o_obj;
+	void			*o_image;
+	void			*o_parent;
+	struct list_head	o_alist;
+} cpt_object_t;
+
+struct cpt_context;
+
+#define for_each_object(obj, type) list_for_each_entry(obj, &ctx->object_array[type], o_list)
+
+
+extern cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx);
+extern void free_cpt_object(cpt_object_t *obj, struct cpt_context *ctx);
+
+cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx);
+cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx);
+
+static inline void cpt_obj_setpos(cpt_object_t *cpt, loff_t pos, struct cpt_context *ctx)
+{
+	cpt->o_pos = pos;
+	/* Add to pos hash table */
+}
+
+static inline void cpt_obj_setobj(cpt_object_t *cpt, void *ptr, struct cpt_context *ctx)
+{
+	cpt->o_obj = ptr;
+	/* Add to hash table */
+}
+
+static inline void cpt_obj_setindex(cpt_object_t *cpt, __u32 index, struct cpt_context *ctx)
+{
+	cpt->o_index = index;
+	/* Add to index hash table */
+}
+
+
+extern void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, struct cpt_context *ctx);
+extern void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_object_t *head, struct cpt_context *ctx);
+extern cpt_object_t *__cpt_object_add(enum _cpt_object_type type, void *p, unsigned int gfp_mask, struct cpt_context *ctx);
+extern cpt_object_t *cpt_object_add(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+extern cpt_object_t *cpt_object_get(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+
+extern int cpt_object_init(struct cpt_context *ctx);
+extern int cpt_object_destroy(struct cpt_context *ctx);
+
+#endif /* __CPT_OBJ_H_ */
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_pagein.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_pagein.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_pagein.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_pagein.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,231 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/uio.h>
+#include <asm/uaccess.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+#include "cpt_pagein.h"
+
+#include "cpt_syscalls.h"
+
+#define MAX_PGVEC_INLINE	8
+#define MAX_PGVEC		1024
+
+static int send_reply(struct file *file, struct pgin_reply *repl,
+		      struct page **pgvec, int npg)
+{
+	int i;
+	int err;
+	struct iovec iov_buf[1 + MAX_PGVEC_INLINE];
+	struct iovec *iov = iov_buf;
+
+
+	if (npg+1 > MAX_PGVEC_INLINE) {
+		iov = kmalloc((npg+1)*sizeof(struct iovec), GFP_KERNEL);
+		if (iov == NULL)
+			return -ENOMEM;
+	}
+
+	iov[0].iov_base = repl;
+	iov[0].iov_len = sizeof(*repl);
+	for (i=0; i<npg; i++) {
+		iov[i+1].iov_base = kmap(pgvec[i]);
+		iov[i+1].iov_len = PAGE_SIZE;
+	}
+	err = file->f_op->writev(file, iov, npg + 1, &file->f_pos);
+	for (i=0; i<npg; i++) {
+		kunmap(pgvec[i]);
+	}
+	if (iov != iov_buf)
+		kfree(iov);
+	if (err < 0)
+		return err;
+	if (err != sizeof(*repl) + PAGE_SIZE*npg)
+		return -EIO;
+	return 0;
+}
+
+static int do_req(struct pgin_request *req, cpt_context_t *ctx)
+{
+	int err = -EIO;
+	struct pgin_reply repl;
+	struct page *pgvec_buf[MAX_PGVEC_INLINE];
+	struct page **pgvec = pgvec_buf;
+	u32 idx;
+	int npg = 0;
+
+	repl.rmid = PGIN_RMID;
+	repl.handle = req->handle;
+
+	if (req->rmid != PGIN_RMID) {
+		eprintk_ctx("pgoutd: bad record mark %08x\n", req->rmid);
+		return -EFAULT;
+	}
+
+	if (req->size == PGIN_STOP)
+		return 1;
+
+	if (req->size == PGIN_START) {
+		repl.error = 0;
+		return send_reply(ctx->pagein_file_out, &repl, NULL, 0);
+	}
+
+//	dprintk_ctx("pgoutd: idx=%u %u\n", (u32)req->index, req->size);
+
+	if (req->size > MAX_PGVEC) {
+		eprintk_ctx("pgoutd: too long\n");
+		goto eio;
+	}
+
+	if (req->size > MAX_PGVEC_INLINE) {
+		pgvec = kmalloc(req->size*sizeof(struct page *), GFP_KERNEL);
+		if (!pgvec) {
+			eprintk_ctx("pgoutd: cannot allocate pgvec\n");
+			goto eio;
+		}
+	}
+
+	if (req->index + req->size > ctx->lazypages) {
+		eprintk_ctx("%u %u %d\n", (u32)req->index, req->size, ctx->lazypages);
+		goto eio;
+	}
+
+	for (idx = req->index; idx < req->index + req->size; idx++) {
+		struct pagein_desc *pd = ctx->pgin_dir[idx/IDX_PER_PAGE];
+		pd += idx%IDX_PER_PAGE;
+
+		err = get_user_pages(current, pd->mm, pd->offset,
+				     1, 0, 1, pgvec+npg, NULL);
+		if (err <= 0) {
+			eprintk_ctx("%d idx=%d %p %08lx\n", err, idx, pd->mm, pd->offset);
+			goto eio;
+		}
+		npg++;
+	}
+	repl.error = 0;
+	err = send_reply(ctx->pagein_file_out, &repl, pgvec, npg);
+	while (--npg >= 0)
+		page_cache_release(pgvec[npg]);
+	if (pgvec != pgvec_buf)
+		kfree(pgvec);
+	return err;
+
+eio:
+	eprintk_ctx("pgoutd: EIO\n");
+	while (--npg >= 0)
+		page_cache_release(pgvec[npg]);
+	if (pgvec && pgvec != pgvec_buf)
+		kfree(pgvec);
+	repl.error = err;
+	return send_reply(ctx->pagein_file_out, &repl, NULL, 0);
+}
+
+static int pgin_loop(void *arg)
+{
+	cpt_context_t *ctx = (cpt_context_t *)arg;
+	char buf[sizeof(struct pgin_request)];
+	struct file * f = ctx->pagein_file_in;
+
+	if (f == NULL) {
+		eprintk_ctx("null pagein_file_in\n");
+		goto out;
+	}
+
+	daemonize("pgoutd");
+	allow_signal(SIGKILL);
+	set_fs(KERNEL_DS);
+
+	for (;;) {
+		int err;
+		int bytes = 0;
+
+		do {
+			int n = vfs_read(f, buf + bytes, sizeof(buf) - bytes, &f->f_pos);
+			if (n <= 0) {
+				eprintk_ctx("pagein_file_in %d\n", n);
+				goto out;
+			}
+			bytes += n;
+		} while (bytes < sizeof(buf));
+
+		err = do_req((struct pgin_request*)buf, ctx);
+		if (err == 1) {
+			dprintk_ctx("pagein_file_in do_req %d\n", err);
+			break;
+		}
+	}
+
+out:
+	complete(&ctx->pgin_notify);
+	return 0;
+}
+
+static void cpt_start_pagein_worker(void *_info)
+{
+	pagein_info_t *info;
+	
+	info = (pagein_info_t *)_info;
+	info->pid = kernel_thread(pgin_loop, info->ctx,
+			CLONE_KERNEL | CLONE_VM | SIGCHLD);
+	complete(&info->done);
+}
+
+int cpt_start_pagein(cpt_context_t *ctx)
+{
+	pagein_info_t create;
+	DECLARE_WORK(work, cpt_start_pagein_worker, &create);
+
+	if (ctx->lazypages == 0)
+		return 0;
+
+	if (ctx->pagein_file_in == NULL ||
+	    ctx->pagein_file_out == NULL ||
+	    ctx->pagein_file_out->f_op->writev == NULL)
+		return -EBADF;
+
+	create.pid = -EINVAL;
+	create.ctx = ctx;
+	init_completion(&create.done);
+	schedule_work(&work);
+	wait_for_completion(&create.done);
+
+	if (create.pid < 0)
+		return create.pid;
+
+	read_lock(&tasklist_lock);
+	ctx->pgin_task = find_task_by_pid_all(create.pid);
+	if (ctx->pgin_task)
+		get_task_struct(ctx->pgin_task);
+	read_unlock(&tasklist_lock);
+	if (ctx->pgin_task == NULL) {
+		eprintk_ctx("pgin task is lost\n");
+		return -ESRCH;
+	}
+	return 0;
+}
+
+void cpt_free_pgin_dir(cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<PGINDIR_SIZE/sizeof(struct pagein_desc*); i++) {
+		unsigned long pg = (unsigned long)ctx->pgin_dir[i];
+		if (pg)
+			free_page(pg);
+	}
+
+	vfree((void *)ctx->pgin_dir);
+	ctx->pgin_dir = NULL;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_pagein.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_pagein.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_pagein.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_pagein.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,31 @@
+
+#define PGIN_RMID	0xF1AD1966
+#define PGIN_START	0xFFFFFFFF
+#define PGIN_STOP	0xFFFFFFFE
+
+#define ITER_PASS	0x1
+#define ITER_STOP	0x2
+
+struct pgin_request
+{
+	__u32	rmid;
+	__u32	size;
+	__u64	index;
+	__u64	handle;
+};
+
+struct pgin_reply
+{
+	__u32	rmid;
+	__u32	error;
+	__u64	handle;
+};
+
+struct pagein_desc
+{
+	struct mm_struct	*mm;
+	unsigned long		offset;
+};
+
+#define IDX_PER_PAGE (PAGE_SIZE/sizeof(struct pagein_desc))
+#define PGINDIR_SIZE (16*PAGE_SIZE)
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_proc.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_proc.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_proc.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_proc.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,579 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_ioctl.h>
+#include <linux/delay.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+
+MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
+MODULE_LICENSE("GPL v2");
+
+/* List of contexts and lock protecting the list */
+struct list_head cpt_context_list;
+spinlock_t cpt_context_lock;
+
+static int proc_read(char *buffer, char **start, off_t offset,
+		     int length, int *eof, void *data)
+{
+	off_t pos = 0;
+	off_t begin = 0;
+	int len = 0;
+	cpt_context_t *ctx;
+
+	len += sprintf(buffer, "Ctx      Id       VE       State\n");
+
+	spin_lock(&cpt_context_lock);
+
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		len += sprintf(buffer+len,"%p %08x %-8u %d",
+			       ctx,
+			       ctx->contextid,
+			       ctx->ve_id,
+			       ctx->ctx_state
+			       );
+
+		buffer[len++] = '\n';
+
+		pos = begin+len;
+		if (pos < offset) {
+			len = 0;
+			begin = pos;
+		}
+		if (pos > offset+length)
+			goto done;
+	}
+	*eof = 1;
+
+done:
+	spin_unlock(&cpt_context_lock);
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+	if(len > length)
+		len = length;
+	if(len < 0)
+		len = 0;
+	return len;
+}
+
+void cpt_context_release(cpt_context_t *ctx)
+{
+	list_del(&ctx->ctx_list);
+	spin_unlock(&cpt_context_lock);
+
+	if (ctx->ctx_state > 0)
+		cpt_resume(ctx);
+	ctx->ctx_state = CPT_CTX_ERROR;
+
+	if (ctx->pgin_task)
+		put_task_struct(ctx->pgin_task);
+	if (ctx->pgin_dir)
+		cpt_free_pgin_dir(ctx);
+	if (ctx->objcount)
+		eprintk_ctx("%d objects leaked\n", ctx->objcount);
+	if (ctx->file)
+		fput(ctx->file);
+	cpt_flush_error(ctx);
+	if (ctx->errorfile) {
+		fput(ctx->errorfile);
+		ctx->errorfile = NULL;
+	}
+	if (ctx->error_msg) {
+		free_page((unsigned long)ctx->error_msg);
+		ctx->error_msg = NULL;
+	}
+	if (ctx->statusfile)
+		fput(ctx->statusfile);
+	if (ctx->lockfile)
+		fput(ctx->lockfile);
+	if (ctx->pagein_file_out)
+		fput(ctx->pagein_file_out);
+	if (ctx->pagein_file_in)
+		fput(ctx->pagein_file_in);
+	kfree(ctx);
+
+	spin_lock(&cpt_context_lock);
+}
+
+static void __cpt_context_put(cpt_context_t *ctx)
+{
+	if (!--ctx->refcount)
+		cpt_context_release(ctx);
+}
+
+static void cpt_context_put(cpt_context_t *ctx)
+{
+	spin_lock(&cpt_context_lock);
+	__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+}
+
+cpt_context_t * cpt_context_open(void)
+{
+	cpt_context_t *ctx;
+
+	if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
+		cpt_context_init(ctx);
+		spin_lock(&cpt_context_lock);
+		list_add_tail(&ctx->ctx_list, &cpt_context_list);
+		spin_unlock(&cpt_context_lock);
+		ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
+		if (ctx->error_msg != NULL)
+			ctx->error_msg[0] = 0;
+	}
+	return ctx;
+}
+
+cpt_context_t * cpt_context_lookup(unsigned int contextid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->contextid == contextid) {
+			ctx->refcount++;
+			spin_unlock(&cpt_context_lock);
+			return ctx;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return NULL;
+}
+
+int cpt_context_lookup_veid(unsigned int veid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->ve_id == veid && ctx->ctx_state > 0) {
+			spin_unlock(&cpt_context_lock);
+			return 1;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return 0;
+}
+
+static int cpt_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	cpt_context_t *ctx;
+	struct file *dfile = NULL;
+	int try;
+
+	unlock_kernel();
+
+	if (cmd == CPT_VMPREP) {
+		err = cpt_mm_prepare(arg);
+		goto out_lock;
+	}
+
+	if (cmd == CPT_TEST_CAPS) {
+		unsigned int src_flags, dst_flags = arg;
+
+		err = 0;
+		src_flags = test_cpu_caps_and_features();
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
+		goto out_lock;
+	}
+
+	if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
+		cpt_context_t *old_ctx;
+
+		ctx = NULL;
+		if (cmd == CPT_JOIN_CONTEXT) {
+			err = -ENOENT;
+			ctx = cpt_context_lookup(arg);
+			if (!ctx)
+				goto out_lock;
+		}
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		file->private_data = ctx;
+
+		if (old_ctx) {
+			if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
+				old_ctx->sticky = 0;
+				old_ctx->refcount--;
+			}
+			__cpt_context_put(old_ctx);
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_lock;
+	}
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	if (ctx)
+		ctx->refcount++;
+	spin_unlock(&cpt_context_lock);
+
+	if (!ctx) {
+		cpt_context_t *old_ctx;
+
+		err = -ENOMEM;
+		ctx = cpt_context_open();
+		if (!ctx)
+			goto out_lock;
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		if (!old_ctx) {
+			ctx->refcount++;
+			file->private_data = ctx;
+		} else {
+			old_ctx->refcount++;
+		}
+		if (old_ctx) {
+			__cpt_context_put(ctx);
+			ctx = old_ctx;
+		}
+		spin_unlock(&cpt_context_lock);
+	}
+
+	if (cmd == CPT_GET_CONTEXT) {
+		unsigned int contextid = (unsigned int)arg;
+
+		if (ctx->contextid && ctx->contextid != contextid) {
+			err = -EINVAL;
+			goto out_nosem;
+		}
+		if (!ctx->contextid) {
+			cpt_context_t *c1 = cpt_context_lookup(contextid);
+			if (c1) {
+				cpt_context_put(c1);
+				err = -EEXIST;
+				goto out_nosem;
+			}
+			ctx->contextid = contextid;
+		}
+		spin_lock(&cpt_context_lock);
+		if (!ctx->sticky) {
+			ctx->sticky = 1;
+			ctx->refcount++;
+		}
+		spin_unlock(&cpt_context_lock);
+		goto out_nosem;
+	}
+
+	down(&ctx->main_sem);
+
+	err = -EBUSY;
+	if (ctx->ctx_state < 0)
+		goto out;
+
+	err = 0;
+	switch (cmd) {
+	case CPT_SET_DUMPFD:
+		if (ctx->ctx_state == CPT_CTX_DUMPING) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			err = -EBADF;
+			dfile = fget(arg);
+			if (dfile == NULL)
+				break;
+			if (dfile->f_op == NULL ||
+			    dfile->f_op->write == NULL) {
+				fput(dfile);
+				break;
+			}
+			err = 0;
+		}
+		if (ctx->file)
+			fput(ctx->file);
+		ctx->file = dfile;
+		break;
+	case CPT_SET_ERRORFD:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->errorfile)
+			fput(ctx->errorfile);
+		ctx->errorfile = dfile;
+		break;
+	case CPT_SET_PAGEINFDIN:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_in)
+			fput(ctx->pagein_file_in);
+		ctx->pagein_file_in = dfile;
+		break;
+	case CPT_SET_PAGEINFDOUT:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_out)
+			fput(ctx->pagein_file_out);
+		ctx->pagein_file_out = dfile;
+		break;
+	case CPT_SET_LAZY:
+		ctx->lazy_vm = arg;
+		break;
+	case CPT_ITER:
+		err = cpt_iteration(ctx);
+		break;
+	case CPT_SET_VEID:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->ve_id = arg;
+		break;
+	case CPT_SET_CPU_FLAGS:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->dst_cpu_flags = arg;
+		ctx->src_cpu_flags = test_cpu_caps_and_features();
+		break;
+	case CPT_SUSPEND:
+		if (cpt_context_lookup_veid(ctx->ve_id) ||
+		    ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->ctx_state = CPT_CTX_SUSPENDING;
+		try = 0;
+		do {
+			err = cpt_vps_suspend(ctx);
+			if (err)
+				cpt_resume(ctx);
+			if (err == -EAGAIN)
+				msleep(1000);
+			try++;
+		} while (err == -EAGAIN && try < 3);
+		if (err) {
+			ctx->ctx_state = CPT_CTX_IDLE;
+		} else {
+			ctx->ctx_state = CPT_CTX_SUSPENDED;
+		}
+		break;
+	case CPT_DUMP:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		err = cpt_dump(ctx);
+		break;
+	case CPT_PAGEIND:
+		err = cpt_start_pagein(ctx);
+		break;
+	case CPT_RESUME:
+		if (ctx->ctx_state == CPT_CTX_IDLE) {
+			err = -ENOENT;
+			break;
+		}
+		err = cpt_resume(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_KILL:
+		if (ctx->ctx_state == CPT_CTX_IDLE) {
+			err = -ENOENT;
+			break;
+		}
+		err = cpt_kill(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_TEST_VECAPS:
+	{
+		__u32 dst_flags = arg;
+		__u32 src_flags;
+
+		err = cpt_vps_caps(ctx, &src_flags);
+		if (err)
+			break;
+
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_EMT64, "emt64", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_IA64, "ia64", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SYSCALL, "syscall", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SYSCALL32, "syscall32", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SEP32, "sysenter32", err);
+		if (dst_flags & (1 << CPT_SLM_DMPRST)) {
+			eprintk_ctx("SLM is enabled on destination node, but slm_dmprst module is not loaded\n");
+			err = 1;
+		}
+
+		if (src_flags & CPT_UNSUPPORTED_MASK)
+			err = 1;
+		break;
+	}
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+out:
+	cpt_flush_error(ctx);
+	up(&ctx->main_sem);
+out_nosem:
+	cpt_context_put(ctx);
+out_lock:
+	lock_kernel();
+	return err;
+}
+
+static int cpt_open(struct inode *inode, struct file *file)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int cpt_release(struct inode * inode, struct file * file)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	file->private_data = NULL;
+
+	if (ctx)
+		__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+
+static struct file_operations cpt_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = cpt_open,
+	.release = cpt_release,
+	.ioctl	 = cpt_ioctl,
+};
+
+static struct proc_dir_entry *proc_ent;
+
+int debug_level = 1;
+
+static struct ctl_table_header *ctl_header;
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 9475,
+		.procname	= "cpt",
+		.data		= &debug_level,
+		.maxlen		= sizeof(debug_level),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static int __init init_cpt(void)
+{
+	int err;
+
+	err = -ENOMEM;
+	ctl_header = register_sysctl_table(root_table, 0);
+	if (!ctl_header)
+		goto err;
+
+	spin_lock_init(&cpt_context_lock);
+	INIT_LIST_HEAD(&cpt_context_list);
+
+	err = -EINVAL;
+	proc_ent = create_proc_entry_mod("cpt", 0600, NULL, THIS_MODULE);
+	if (!proc_ent)
+		goto err_out;
+
+	cpt_fops.read = proc_ent->proc_fops->read;
+	cpt_fops.write = proc_ent->proc_fops->write;
+	cpt_fops.llseek = proc_ent->proc_fops->llseek;
+	proc_ent->proc_fops = &cpt_fops;
+
+	proc_ent->read_proc = proc_read;
+	proc_ent->data = NULL;
+	proc_ent->owner = THIS_MODULE;
+	return 0;
+
+err_out:
+	unregister_sysctl_table(ctl_header);
+err:
+	return err;
+}
+module_init(init_cpt);
+
+static void __exit exit_cpt(void)
+{
+	remove_proc_entry("cpt", NULL);
+	unregister_sysctl_table(ctl_header);
+
+	spin_lock(&cpt_context_lock);
+	while (!list_empty(&cpt_context_list)) {
+		cpt_context_t *ctx;
+		ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
+
+		if (!ctx->sticky)
+			ctx->refcount++;
+		ctx->sticky = 0;
+
+		BUG_ON(ctx->refcount != 1);
+
+		__cpt_context_put(ctx);
+	}
+	spin_unlock(&cpt_context_lock);
+}
+module_exit(exit_cpt);
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_process.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_process.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_process.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_process.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,945 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_ubc.h"
+#include "cpt_process.h"
+#include "cpt_kernel.h"
+
+#ifdef CONFIG_X86_64
+#define task_pt_regs(t) (((struct pt_regs *) (THREAD_SIZE + (unsigned long) (t)->thread_info)) - 1)
+#endif
+
+int check_task_state(task_t *tsk, struct cpt_context *ctx)
+{
+#ifdef CONFIG_X86_64
+	if (!(tsk->thread_info->flags&_TIF_IA32)) {
+		if (task_pt_regs(tsk)->rip >= VSYSCALL_START &&
+				task_pt_regs(tsk)->rip < VSYSCALL_END) {
+			eprintk_ctx(CPT_FID "cannot be checkpointied while vsyscall, try later\n", CPT_TID(tsk));
+			return -EAGAIN;
+		}
+	}
+#endif
+	return 0;
+}
+
+static u32 encode_segment(u32 segreg)
+{
+	segreg &= 0xFFFF;
+
+	if (segreg == 0)
+		return CPT_SEG_ZERO;
+	if ((segreg & 3) != 3) {
+		wprintk("Invalid RPL of a segment reg %x\n", segreg);
+		return CPT_SEG_ZERO;
+	}
+
+	/* LDT descriptor, it is just an index to LDT array */
+	if (segreg & 4)
+		return CPT_SEG_LDT + (segreg >> 3);
+
+	/* TLS descriptor. */
+	if ((segreg >> 3) >= GDT_ENTRY_TLS_MIN &&
+	    (segreg >> 3) <= GDT_ENTRY_TLS_MAX)
+		return CPT_SEG_TLS1 + ((segreg>>3) - GDT_ENTRY_TLS_MIN);
+
+	/* One of standard desriptors */
+#ifdef CONFIG_X86_64
+	if (segreg == __USER32_DS)
+		return CPT_SEG_USER32_DS;
+	if (segreg == __USER32_CS)
+		return CPT_SEG_USER32_CS;
+	if (segreg == __USER_DS)
+		return CPT_SEG_USER64_DS;
+	if (segreg == __USER_CS)
+		return CPT_SEG_USER64_CS;
+#else
+	if (segreg == __USER_DS)
+		return CPT_SEG_USER32_DS;
+	if (segreg == __USER_CS)
+		return CPT_SEG_USER32_CS;
+#endif
+	wprintk("Invalid segment reg %x\n", segreg);
+	return CPT_SEG_ZERO;
+}
+
+#ifdef CONFIG_X86_64
+static void xlate_ptregs_64_to_32(struct cpt_x86_regs *d, struct pt_regs *s, task_t *tsk)
+{
+	d->cpt_ebp = s->rbp;
+	d->cpt_ebx = s->rbx;
+	d->cpt_eax = s->rax;
+	d->cpt_ecx = s->rcx;
+	d->cpt_edx = s->rdx;
+	d->cpt_esi = s->rsi;
+	d->cpt_edi = s->rdi;
+	d->cpt_orig_eax = s->orig_rax;
+	d->cpt_eip = s->rip;
+	d->cpt_xcs = encode_segment(s->cs);
+	d->cpt_eflags = s->eflags;
+	d->cpt_esp = s->rsp;
+	d->cpt_xss = encode_segment(s->ss);
+	d->cpt_xds = encode_segment(tsk->thread.ds);
+	d->cpt_xes = encode_segment(tsk->thread.es);
+}
+
+static int dump_registers(task_t *tsk, struct cpt_context *ctx)
+{
+	cpt_open_object(NULL, ctx);
+
+	if (tsk->thread_info->flags&_TIF_IA32) {
+		struct cpt_x86_regs ri;
+		ri.cpt_next = sizeof(ri);
+		ri.cpt_object = CPT_OBJ_X86_REGS;
+		ri.cpt_hdrlen = sizeof(ri);
+		ri.cpt_content = CPT_CONTENT_VOID;
+
+		ri.cpt_debugreg[0] = tsk->thread.debugreg0;
+		ri.cpt_debugreg[1] = tsk->thread.debugreg1;
+		ri.cpt_debugreg[2] = tsk->thread.debugreg2;
+		ri.cpt_debugreg[3] = tsk->thread.debugreg3;
+		ri.cpt_debugreg[4] = 0;
+		ri.cpt_debugreg[5] = 0;
+		ri.cpt_debugreg[6] = tsk->thread.debugreg6;
+		ri.cpt_debugreg[7] = tsk->thread.debugreg7;
+		ri.cpt_fs = encode_segment(tsk->thread.fsindex);
+		ri.cpt_gs = encode_segment(tsk->thread.gsindex);
+
+		xlate_ptregs_64_to_32(&ri, task_pt_regs(tsk), tsk);
+
+		ctx->write(&ri, sizeof(ri), ctx);
+	} else {
+		struct cpt_x86_64_regs ri;
+		ri.cpt_next = sizeof(ri);
+		ri.cpt_object = CPT_OBJ_X86_64_REGS;
+		ri.cpt_hdrlen = sizeof(ri);
+		ri.cpt_content = CPT_CONTENT_VOID;
+
+		ri.cpt_fsbase = tsk->thread.fs;
+		ri.cpt_gsbase = tsk->thread.gs;
+		ri.cpt_fsindex = encode_segment(tsk->thread.fsindex);
+		ri.cpt_gsindex = encode_segment(tsk->thread.gsindex);
+		ri.cpt_ds = encode_segment(tsk->thread.ds);
+		ri.cpt_es = encode_segment(tsk->thread.es);
+		ri.cpt_debugreg[0] = tsk->thread.debugreg0;
+		ri.cpt_debugreg[1] = tsk->thread.debugreg1;
+		ri.cpt_debugreg[2] = tsk->thread.debugreg2;
+		ri.cpt_debugreg[3] = tsk->thread.debugreg3;
+		ri.cpt_debugreg[4] = 0;
+		ri.cpt_debugreg[5] = 0;
+		ri.cpt_debugreg[6] = tsk->thread.debugreg6;
+		ri.cpt_debugreg[7] = tsk->thread.debugreg7;
+
+		memcpy(&ri.cpt_r15, task_pt_regs(tsk), sizeof(struct pt_regs));
+
+		ri.cpt_cs = encode_segment(task_pt_regs(tsk)->cs);
+		ri.cpt_ss = encode_segment(task_pt_regs(tsk)->ss);
+
+		ctx->write(&ri, sizeof(ri), ctx);
+	}
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+#else
+
+static int dump_registers(task_t *tsk, struct cpt_context *ctx)
+{
+	struct cpt_x86_regs ri;
+
+	cpt_open_object(NULL, ctx);
+
+	ri.cpt_next = sizeof(ri);
+	ri.cpt_object = CPT_OBJ_X86_REGS;
+	ri.cpt_hdrlen = sizeof(ri);
+	ri.cpt_content = CPT_CONTENT_VOID;
+
+	ri.cpt_debugreg[0] = tsk->thread.debugreg[0];
+	ri.cpt_debugreg[1] = tsk->thread.debugreg[1];
+	ri.cpt_debugreg[2] = tsk->thread.debugreg[2];
+	ri.cpt_debugreg[3] = tsk->thread.debugreg[3];
+	ri.cpt_debugreg[4] = tsk->thread.debugreg[4];
+	ri.cpt_debugreg[5] = tsk->thread.debugreg[5];
+	ri.cpt_debugreg[6] = tsk->thread.debugreg[6];
+	ri.cpt_debugreg[7] = tsk->thread.debugreg[7];
+	ri.cpt_fs = encode_segment(tsk->thread.fs);
+	ri.cpt_gs = encode_segment(tsk->thread.gs);
+
+	memcpy(&ri.cpt_ebx, task_pt_regs(tsk), sizeof(struct pt_regs));
+
+	ri.cpt_xcs = encode_segment(task_pt_regs(tsk)->xcs);
+	ri.cpt_xss = encode_segment(task_pt_regs(tsk)->xss);
+	ri.cpt_xds = encode_segment(task_pt_regs(tsk)->xds);
+	ri.cpt_xes = encode_segment(task_pt_regs(tsk)->xes);
+
+	ctx->write(&ri, sizeof(ri), ctx);
+	cpt_close_object(ctx);
+
+	return 0;
+}
+#endif
+
+static int dump_kstack(task_t *tsk, struct cpt_context *ctx)
+{
+	struct cpt_obj_bits hdr;
+	unsigned long size;
+	void *start;
+
+	cpt_open_object(NULL, ctx);
+
+#ifdef CONFIG_X86_64
+	size = tsk->thread.rsp0 - tsk->thread.rsp;
+	start = (void*)tsk->thread.rsp;
+#else
+	size = tsk->thread.esp0 - tsk->thread.esp;
+	start = (void*)tsk->thread.esp;
+#endif
+
+	hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
+	hdr.cpt_object = CPT_OBJ_BITS;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = CPT_CONTENT_STACK;
+	hdr.cpt_size = size;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	ctx->write(start, size, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	return 0;
+}
+
+/* Formats of i387_fxsave_struct are the same for x86_64
+ * and i386. Plain luck. */
+
+static int dump_fpustate(task_t *tsk, struct cpt_context *ctx)
+{
+	struct cpt_obj_bits hdr;
+	unsigned long size;
+	int type;
+
+	cpt_open_object(NULL, ctx);
+
+	type = CPT_CONTENT_X86_FPUSTATE;
+	size = sizeof(struct i387_fxsave_struct);
+#ifndef CONFIG_X86_64
+	if (!cpu_has_fxsr) {
+		size = sizeof(struct i387_fsave_struct);
+		type = CPT_CONTENT_X86_FPUSTATE_OLD;
+	}
+#endif
+
+	hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
+	hdr.cpt_object = CPT_OBJ_BITS;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = type;
+	hdr.cpt_size = size;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	ctx->write(&tsk->thread.i387, size, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	return 0;
+}
+
+static int encode_siginfo(struct cpt_siginfo_image *si, siginfo_t *info)
+{
+	si->cpt_signo = info->si_signo;
+	si->cpt_errno = info->si_errno;
+	si->cpt_code = info->si_code;
+
+	switch(si->cpt_code & __SI_MASK) {
+	case __SI_TIMER:
+		si->cpt_pid = info->si_tid;
+		si->cpt_uid = info->si_overrun;
+		si->cpt_sigval = cpt_ptr_export(info->_sifields._timer._sigval.sival_ptr);
+		si->cpt_utime = info->si_sys_private;
+		break;
+	case __SI_POLL:
+		si->cpt_pid = info->si_band;
+		si->cpt_uid = info->si_fd;
+		break;
+	case __SI_FAULT:
+		si->cpt_sigval = cpt_ptr_export(info->si_addr);
+#ifdef __ARCH_SI_TRAPNO
+		si->cpt_pid = info->si_trapno;
+#endif
+		break;
+	case __SI_CHLD:
+		si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_type_to_vpid(PIDTYPE_PID, info->si_pid);
+		si->cpt_uid = info->si_uid;
+		si->cpt_sigval = info->si_status;
+		si->cpt_stime = info->si_stime;
+		si->cpt_utime = info->si_utime;
+		break;
+	case __SI_KILL:
+	case __SI_RT:
+	case __SI_MESGQ:
+	default:
+		si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_type_to_vpid(PIDTYPE_TGID, info->si_pid);
+		si->cpt_uid = info->si_uid;
+		si->cpt_sigval = cpt_ptr_export(info->si_ptr);
+		break;
+	}
+	return 0;
+}
+
+static int dump_sigqueue(struct sigpending *list, struct cpt_context *ctx)
+{
+	struct sigqueue *q;
+	loff_t saved_obj;
+
+	if (list_empty(&list->list))
+		return 0;
+
+	cpt_push_object(&saved_obj, ctx);
+	list_for_each_entry(q, &list->list, list) {
+		struct cpt_siginfo_image si;
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_SIGINFO;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		si.cpt_qflags = q->flags;
+		si.cpt_user = q->user->uid;
+
+		if (encode_siginfo(&si, &q->info))
+			return -EINVAL;
+
+		ctx->write(&si, sizeof(si), ctx);
+	}
+	cpt_pop_object(&saved_obj, ctx);
+	return 0;
+}
+
+
+
+static int dump_one_signal_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct signal_struct *sig = obj->o_obj;
+	struct cpt_signal_image *v = cpt_get_buf(ctx);
+	task_t *tsk;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SIGNAL_STRUCT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	if (sig->pgrp <= 0) {
+		eprintk_ctx("bad pgid\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_pgrp_type = CPT_PGRP_NORMAL;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->pgrp);
+	if (tsk == NULL)
+		v->cpt_pgrp_type = CPT_PGRP_ORPHAN;
+	read_unlock(&tasklist_lock);
+	v->cpt_pgrp = pid_type_to_vpid(PIDTYPE_PGID, sig->pgrp);
+
+	v->cpt_old_pgrp = 0;
+	if (sig->tty_old_pgrp < 0) {
+		eprintk_ctx("bad tty_old_pgrp\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	if (sig->tty_old_pgrp > 0) {
+		v->cpt_old_pgrp_type = CPT_PGRP_NORMAL;
+		read_lock(&tasklist_lock);
+		tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->tty_old_pgrp);
+		if (tsk == NULL) {
+			v->cpt_old_pgrp_type = CPT_PGRP_ORPHAN;
+			tsk = find_task_by_pid_type_ve(PIDTYPE_PGID, sig->tty_old_pgrp);
+		}
+		read_unlock(&tasklist_lock);
+		if (tsk == NULL) {
+			eprintk_ctx("tty_old_pgrp does not exist anymore\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_old_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, sig->tty_old_pgrp);
+		if ((int)v->cpt_old_pgrp < 0) {
+			dprintk_ctx("stray tty_old_pgrp %d\n", sig->tty_old_pgrp);
+			v->cpt_old_pgrp = -1;
+			v->cpt_old_pgrp_type = CPT_PGRP_STRAY;
+		}
+	}
+
+	if (sig->session <= 0) {
+		eprintk_ctx("bad session\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_session_type = CPT_PGRP_NORMAL;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->session);
+	if (tsk == NULL)
+		v->cpt_session_type = CPT_PGRP_ORPHAN;
+	read_unlock(&tasklist_lock);
+	v->cpt_session = pid_type_to_vpid(PIDTYPE_SID, sig->session);
+
+	v->cpt_leader = sig->leader;
+	v->cpt_ctty = CPT_NULL;
+	if (sig->tty) {
+		cpt_object_t *cobj = lookup_cpt_object(CPT_OBJ_TTY, sig->tty, ctx);
+		if (cobj)
+			v->cpt_ctty = cobj->o_pos;
+		else {
+			eprintk_ctx("controlling tty is not found\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	memcpy(&v->cpt_sigpending, &sig->shared_pending.signal, 8);
+
+	v->cpt_curr_target = 0;
+	if (sig->curr_target)
+		v->cpt_curr_target = virt_pid(sig->curr_target);
+	v->cpt_group_exit = sig->group_exit;
+	v->cpt_group_exit_code = sig->group_exit_code;
+	v->cpt_group_exit_task = 0;
+	if (sig->group_exit_task)
+		v->cpt_group_exit_task = virt_pid(sig->group_exit_task);
+	v->cpt_notify_count = sig->notify_count;
+	v->cpt_group_stop_count = sig->group_stop_count;
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,8)
+	v->cpt_stop_state = sig->stop_state;
+ 
+	v->cpt_utime = sig->utime;
+	v->cpt_stime = sig->stime;
+	v->cpt_cutime = sig->cutime;
+	v->cpt_cstime = sig->cstime;
+	v->cpt_nvcsw = sig->nvcsw;
+	v->cpt_nivcsw = sig->nivcsw;
+	v->cpt_cnvcsw = sig->cnvcsw;
+	v->cpt_cnivcsw = sig->cnivcsw;
+	v->cpt_min_flt = sig->min_flt;
+	v->cpt_maj_flt = sig->maj_flt;
+	v->cpt_cmin_flt = sig->cmin_flt;
+	v->cpt_cmaj_flt = sig->cmaj_flt;
+#endif
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9)
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
+		if (i < RLIM_NLIMITS) {
+			v->cpt_rlim_cur[i] = sig->rlim[i].rlim_cur;
+			v->cpt_rlim_max[i] = sig->rlim[i].rlim_max;
+		} else {
+			v->cpt_rlim_cur[i] = CPT_NULL;
+			v->cpt_rlim_max[i] = CPT_NULL;
+		}
+	}
+#endif
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	dump_sigqueue(&sig->shared_pending, ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+
+static int dump_one_process(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	task_t *tsk = obj->o_obj;
+	int last_thread;
+	struct cpt_task_image *v = cpt_get_buf(ctx);
+	cpt_object_t *tobj;
+	cpt_object_t *tg_obj;
+	loff_t saved_obj;
+	int i;
+	int err;
+	struct timespec delta;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_signal = CPT_NULL;
+	tg_obj = lookup_cpt_object(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx);
+	if (!tg_obj) BUG();
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_TASK;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_state = tsk->state;
+	if (tsk->state == EXIT_ZOMBIE) {
+		eprintk_ctx("invalid zombie state on" CPT_FID "\n", CPT_TID(tsk));
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	} else if (tsk->state == EXIT_DEAD) {
+		if (tsk->exit_state != EXIT_DEAD &&
+		    tsk->exit_state != EXIT_ZOMBIE) {
+			eprintk_ctx("invalid exit_state %ld on" CPT_FID "\n", tsk->exit_state, CPT_TID(tsk));
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	if (tsk->exit_state) {
+		v->cpt_state = tsk->exit_state;
+		if (tsk->state != EXIT_DEAD) {
+			eprintk_ctx("invalid tsk->state %ld/%ld on" CPT_FID "\n",
+				tsk->state, tsk->exit_state, CPT_TID(tsk));
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	v->cpt_flags = tsk->flags&~PF_FROZEN;
+	v->cpt_ptrace = tsk->ptrace;
+	v->cpt_prio = tsk->prio;
+	v->cpt_exit_code = tsk->exit_code;
+	v->cpt_exit_signal = tsk->exit_signal;
+	v->cpt_pdeath_signal = tsk->pdeath_signal;
+	v->cpt_static_prio = tsk->static_prio;
+	v->cpt_rt_priority = tsk->rt_priority;
+	v->cpt_policy = tsk->policy;
+	if (v->cpt_policy != SCHED_NORMAL) {
+		eprintk_ctx("scheduler policy is not supported %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+
+	v->cpt_mm = CPT_NULL;
+	if (tsk->mm) {
+		tobj = lookup_cpt_object(CPT_OBJ_MM, tsk->mm, ctx);
+		if (!tobj) BUG();
+		v->cpt_mm = tobj->o_pos;
+	}
+	v->cpt_files = CPT_NULL;
+	if (tsk->files) {
+		tobj = lookup_cpt_object(CPT_OBJ_FILES, tsk->files, ctx);
+		if (!tobj) BUG();
+		v->cpt_files = tobj->o_pos;
+	}
+	v->cpt_fs = CPT_NULL;
+	if (tsk->fs) {
+		tobj = lookup_cpt_object(CPT_OBJ_FS, tsk->fs, ctx);
+		if (!tobj) BUG();
+		v->cpt_fs = tobj->o_pos;
+	}
+	v->cpt_namespace = CPT_NULL;
+	if (tsk->namespace) {
+		tobj = lookup_cpt_object(CPT_OBJ_NAMESPACE, tsk->namespace, ctx);
+		if (!tobj) BUG();
+		v->cpt_namespace = tobj->o_pos;
+
+		if (tsk->namespace != current->namespace)
+			eprintk_ctx("namespaces are not supported: process %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
+	}
+	v->cpt_sysvsem_undo = CPT_NULL;
+	if (tsk->sysvsem.undo_list && !tsk->exit_state) {
+		tobj = lookup_cpt_object(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx);
+		if (!tobj) BUG();
+		v->cpt_sysvsem_undo = tobj->o_pos;
+	}
+	v->cpt_sighand = CPT_NULL;
+	if (tsk->sighand) {
+		tobj = lookup_cpt_object(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx);
+		if (!tobj) BUG();
+		v->cpt_sighand = tobj->o_pos;
+	}
+	v->cpt_sigblocked = cpt_sigset_export(&tsk->blocked);
+	v->cpt_sigrblocked = cpt_sigset_export(&tsk->real_blocked);
+	v->cpt_sigsuspend_blocked = cpt_sigset_export(&tsk->saved_sigset);
+
+	v->cpt_pid = virt_pid(tsk);
+	v->cpt_tgid = virt_tgid(tsk);
+	v->cpt_ppid = 0;
+	if (tsk->parent) {
+		if (tsk->parent != tsk->real_parent &&
+		    !lookup_cpt_object(CPT_OBJ_TASK, tsk->parent, ctx)) {
+			eprintk_ctx("task %d/%d(%s) is ptraced from ve0\n", tsk->pid, virt_pid(tsk), tsk->comm);
+			cpt_release_buf(ctx);
+			return -EBUSY;
+		}
+		v->cpt_ppid = virt_pid(tsk->parent);
+	}
+	v->cpt_rppid = tsk->real_parent ? virt_pid(tsk->real_parent) : 0;
+	v->cpt_pgrp = virt_pgid(tsk);
+	v->cpt_session = virt_sid(tsk);
+	v->cpt_old_pgrp = 0;
+	if (tsk->signal->tty_old_pgrp)
+		v->cpt_old_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, tsk->signal->tty_old_pgrp);
+	v->cpt_leader = tsk->group_leader ? virt_pid(tsk->group_leader) : 0;
+	v->cpt_set_tid = (unsigned long)tsk->set_child_tid;
+	v->cpt_clear_tid = (unsigned long)tsk->clear_child_tid;
+	memcpy(v->cpt_comm, tsk->comm, 16);
+	v->cpt_user = tsk->user->uid;
+	v->cpt_uid = tsk->uid;
+	v->cpt_euid = tsk->euid;
+	v->cpt_suid = tsk->suid;
+	v->cpt_fsuid = tsk->fsuid;
+	v->cpt_gid = tsk->gid;
+	v->cpt_egid = tsk->egid;
+	v->cpt_sgid = tsk->sgid;
+	v->cpt_fsgid = tsk->fsgid;
+	v->cpt_ngids = 0;
+	if (tsk->group_info && tsk->group_info->ngroups != 0) {
+		int i = tsk->group_info->ngroups;
+		if (i > 32) {
+			eprintk_ctx("too many of groups, truncated\n");
+			i = 32;
+		}
+		v->cpt_ngids = i;
+		for (i--; i>=0; i--)
+			v->cpt_gids[i] = tsk->group_info->small_block[i];
+	}
+	memcpy(&v->cpt_ecap, &tsk->cap_effective, 8);
+	memcpy(&v->cpt_icap, &tsk->cap_inheritable, 8);
+	memcpy(&v->cpt_pcap, &tsk->cap_permitted, 8);
+	v->cpt_keepcap = tsk->keep_capabilities;
+
+	v->cpt_did_exec = tsk->did_exec;
+	v->cpt_exec_domain = -1;
+	v->cpt_thrflags = tsk->thread_info->flags & ~(1<<TIF_FREEZE);
+	v->cpt_64bit = 0;
+#ifdef CONFIG_X86_64
+	/* Clear x86_64 specific flags */
+	v->cpt_thrflags &= ~(_TIF_FORK|_TIF_IA32);
+	if (!(tsk->thread_info->flags & _TIF_IA32)) {
+		ctx->tasks64++;
+		v->cpt_64bit = 1;
+	}
+#endif
+	v->cpt_thrstatus = tsk->thread_info->status;
+	v->cpt_addr_limit = -1;
+
+	v->cpt_personality = tsk->personality;
+
+	for (i=0; i<GDT_ENTRY_TLS_ENTRIES; i++) {
+		if (i>=3) {
+			eprintk_ctx("too many tls descs\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+#ifndef CONFIG_X86_64
+		v->cpt_tls[i] = (((u64)tsk->thread.tls_array[i].b)<<32) + tsk->thread.tls_array[i].a;
+#else
+		v->cpt_tls[i] = tsk->thread.tls_array[i];
+#endif
+	}
+
+	v->cpt_restart.fn = CPT_RBL_0;
+	if (tsk->thread_info->restart_block.fn != current->thread_info->restart_block.fn) {
+		long val;
+
+		if (tsk->thread_info->restart_block.fn != nanosleep_restart
+#ifdef CONFIG_X86_64
+		    && tsk->thread_info->restart_block.fn != compat_nanosleep_restart
+#endif
+		    ) {
+			eprintk_ctx("unknown restart block %p\n", tsk->thread_info->restart_block.fn);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_restart.fn = CPT_RBL_NANOSLEEP;
+#ifdef CONFIG_X86_64
+		if (tsk->thread_info->restart_block.fn == compat_nanosleep_restart)
+			v->cpt_restart.fn = CPT_RBL_COMPAT_NANOSLEEP;
+#endif
+		val = tsk->thread_info->restart_block.arg0 - (unsigned long)ctx->cpt_jiffies64;
+		if (val < 0)
+			val = 0;
+		v->cpt_restart.arg0 = (s64)val * TICK_NSEC;
+		v->cpt_restart.arg1 = 0;
+		v->cpt_restart.arg2 = tsk->thread_info->restart_block.arg1;
+		v->cpt_restart.arg3 = CLOCK_MONOTONIC;
+	}
+	v->cpt_it_real_incr = tsk->it_real_incr*TICK_NSEC;
+	v->cpt_it_prof_incr = tsk->it_prof_incr;
+	v->cpt_it_virt_incr = tsk->it_virt_incr;
+	v->cpt_it_real_value = 0;
+	if (timer_pending(&tsk->real_timer)) {
+		unsigned long val = tsk->real_timer.expires - 
+				(unsigned long)ctx->cpt_jiffies64;
+		if ((long)val <= 0)
+			val = 1;
+		v->cpt_it_real_value = (u64)val * TICK_NSEC;
+	}
+	v->cpt_it_prof_value = tsk->it_prof_value;
+	v->cpt_it_virt_value = tsk->it_virt_value;
+	v->cpt_used_math = tsk->used_math;
+
+	if (tsk->notifier) {
+		eprintk_ctx("task notifier is in use: process %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+
+	v->cpt_utime = tsk->utime;
+	v->cpt_stime = tsk->stime;
+	delta = tsk->start_time;
+	set_normalized_timespec(&delta,
+			delta.tv_sec - get_exec_env()->start_timespec.tv_sec,
+			delta.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
+	v->cpt_starttime = cpt_timespec_export(&delta);
+	v->cpt_nvcsw = tsk->nvcsw;
+	v->cpt_nivcsw = tsk->nivcsw;
+	v->cpt_min_flt = tsk->min_flt;
+	v->cpt_maj_flt = tsk->maj_flt;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
+	v->cpt_cutime = tsk->cutime;
+	v->cpt_cstime = tsk->cstime;
+	v->cpt_cnvcsw = tsk->cnvcsw;
+	v->cpt_cnivcsw = tsk->cnivcsw;
+	v->cpt_cmin_flt = tsk->cmin_flt;
+	v->cpt_cmaj_flt = tsk->cmaj_flt;
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,9)
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
+		if (i < RLIM_NLIMITS) {
+			v->cpt_rlim_cur[i] = tsk->rlim[i].rlim_cur;
+			v->cpt_rlim_max[i] = tsk->rlim[i].rlim_max;
+		} else {
+			v->cpt_rlim_cur[i] = CPT_NULL;
+			v->cpt_rlim_max[i] = CPT_NULL;
+		}
+	}
+#endif
+
+	if (tsk->mm)
+		v->cpt_mm_ub = cpt_lookup_ubc(mm_ub(tsk->mm), ctx);
+	else
+		v->cpt_mm_ub = CPT_NULL;
+	v->cpt_task_ub = cpt_lookup_ubc(task_bc(tsk)->task_ub, ctx);
+	v->cpt_exec_ub = cpt_lookup_ubc(task_bc(tsk)->exec_ub, ctx);
+	v->cpt_fork_sub = cpt_lookup_ubc(task_bc(tsk)->fork_sub, ctx);
+
+	v->cpt_ptrace_message = tsk->ptrace_message;
+	v->cpt_pn_state = tsk->pn_state;
+	v->cpt_stopped_state = tsk->stopped_state;
+	v->cpt_sigsuspend_state = tsk->sigsuspend_state;
+
+#ifndef CONFIG_X86_64
+	if (tsk->thread.vm86_info) {
+		eprintk_ctx("vm86 task is running\n");
+		cpt_release_buf(ctx);
+		return -EBUSY;
+	}
+#endif
+
+	v->cpt_sigpending = cpt_sigset_export(&tsk->pending.signal);
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	dump_kstack(tsk, ctx);
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	err = dump_registers(tsk, ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	if (err)
+		return err;
+
+	if (tsk->used_math) {
+		cpt_push_object(&saved_obj, ctx);
+		dump_fpustate(tsk, ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+	if (tsk->last_siginfo) {
+		struct cpt_siginfo_image si;
+		cpt_push_object(&saved_obj, ctx);
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_LASTSIGINFO;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		if (encode_siginfo(&si, tsk->last_siginfo))
+			return -EINVAL;
+
+		ctx->write(&si, sizeof(si), ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+	if (tsk->sas_ss_size) {
+		struct cpt_sigaltstack_image si;
+		cpt_push_object(&saved_obj, ctx);
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_SIGALTSTACK;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		si.cpt_stack = tsk->sas_ss_sp;
+		si.cpt_stacksize = tsk->sas_ss_size;
+
+		ctx->write(&si, sizeof(si), ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+	dump_sigqueue(&tsk->pending, ctx);
+
+	last_thread = 1;
+	read_lock(&tasklist_lock);
+	do {
+		task_t * next = next_thread(tsk);
+		if (next != tsk && !thread_group_leader(next))
+			last_thread = 0;
+	} while (0);
+	read_unlock(&tasklist_lock);
+
+	if (last_thread) {
+		task_t *prev_tsk;
+		int err;
+		loff_t pos = ctx->file->f_pos;
+
+		cpt_push_object(&saved_obj, ctx);
+		err = dump_one_signal_struct(tg_obj, ctx);
+		cpt_pop_object(&saved_obj, ctx);
+		if (err)
+			return err;
+
+		prev_tsk = tsk;
+		for (;;) {
+			if (prev_tsk->tgid == tsk->tgid) {
+				loff_t tg_pos;
+
+				tg_pos = obj->o_pos + offsetof(struct cpt_task_image, cpt_signal);
+				ctx->pwrite(&pos, sizeof(pos), ctx, tg_pos);
+				if (thread_group_leader(prev_tsk))
+					break;
+			}
+
+			if (obj->o_list.prev == &ctx->object_array[CPT_OBJ_TASK]) {
+				eprintk_ctx("bug: thread group leader is lost\n");
+				return -EINVAL;
+			}
+
+			obj = list_entry(obj->o_list.prev, cpt_object_t, o_list);
+			prev_tsk = obj->o_obj;
+		}
+	}
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_tasks(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_TASKS);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		int err;
+
+		if ((err = dump_one_process(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_collect_signals(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	/* Collect process fd sets */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->signal && !list_empty(&tsk->signal->posix_timers)) {
+			eprintk_ctx("task %d/%d(%s) uses posix timers\n", tsk->pid, virt_pid(tsk), tsk->comm);
+			return -EBUSY;
+		}
+		if (tsk->signal && cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx) == NULL)
+			return -ENOMEM;
+		if (tsk->sighand && cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx) == NULL)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+
+static int dump_one_sighand_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct sighand_struct *sig = obj->o_obj;
+	struct cpt_sighand_image *v = cpt_get_buf(ctx);
+	int i;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SIGHAND_STRUCT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	for (i=0; i< _NSIG; i++) {
+		if (sig->action[i].sa.sa_handler != SIG_DFL ||
+		    sig->action[i].sa.sa_flags) {
+			loff_t saved_obj;
+			struct cpt_sighandler_image *o = cpt_get_buf(ctx);
+
+			cpt_push_object(&saved_obj, ctx);
+			cpt_open_object(NULL, ctx);
+
+			o->cpt_next = CPT_NULL;
+			o->cpt_object = CPT_OBJ_SIGHANDLER;
+			o->cpt_hdrlen = sizeof(*o);
+			o->cpt_content = CPT_CONTENT_VOID;
+
+			o->cpt_signo = i;
+			o->cpt_handler = (unsigned long)sig->action[i].sa.sa_handler;
+			o->cpt_restorer = (unsigned long)sig->action[i].sa.sa_restorer;
+			o->cpt_flags = sig->action[i].sa.sa_flags;
+			memcpy(&o->cpt_mask, &sig->action[i].sa.sa_mask, 8);
+			ctx->write(o, sizeof(*o), ctx);
+			cpt_release_buf(ctx);
+			cpt_close_object(ctx);
+			cpt_pop_object(&saved_obj, ctx);
+		}
+	}
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_sighand(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_SIGHAND_STRUCT);
+
+	for_each_object(obj, CPT_OBJ_SIGHAND_STRUCT) {
+		int err;
+
+		if ((err = dump_one_sighand_struct(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_process.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_process.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_process.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_process.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,12 @@
+int cpt_collect_signals(cpt_context_t *);
+int cpt_dump_signal(struct cpt_context *);
+int cpt_dump_sighand(struct cpt_context *);
+int cpt_dump_tasks(struct cpt_context *);
+
+int rst_signal_complete(struct cpt_task_image *ti, int *exiting, struct cpt_context *ctx);
+__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int rst_restore_process(struct cpt_context *ctx);
+int rst_process_linkage(struct cpt_context *ctx);
+
+int check_task_state(task_t *tsk, struct cpt_context *ctx);
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_socket.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_socket.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_socket.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_socket.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,750 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <linux/tcp.h>
+#include <net/sock.h>
+#include <net/scm.h>
+#include <net/af_unix.h>
+#include <net/tcp.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+
+static int dump_rqueue(int owner, struct sock *sk, struct cpt_context *ctx);
+
+
+/* Sockets are quite different of another kinds of files.
+ * There is one simplification: only one struct file can refer to a socket,
+ * so we could store information about socket directly in section FILES as
+ * a description of a file and append f.e. array of not-yet-accepted
+ * connections of listening socket as array of auxiliary data.
+ *
+ * Complications are:
+ * 1. TCP sockets can be orphans. We have to relocate orphans as well,
+ *    so we have to create special section for orphans.
+ * 2. AF_UNIX sockets are distinguished objects: set of links between
+ *    AF_UNIX sockets is quite arbitrary.
+ *    A. Each socket can refers to many of files due to FD passing.
+ *    B. Each socket except for connected ones can have in queue skbs
+ *       sent by any of sockets.
+ *
+ *    2A is relatively easy: after our tasks are frozen we make an additional
+ *    recursive pass throgh set of collected files and get referenced to
+ *    FD passed files. After end of recursion, all the files are treated
+ *    in the same way. All they will be stored in section FILES.
+ *
+ *    2B. We have to resolve all those references at some point.
+ *    It is the place where pipe-like approach to image fails.
+ *
+ * All this makes socket checkpointing quite chumbersome.
+ * Right now we collect all the sockets and assign some numeric index value
+ * to each of them. The socket section is separate and put after section FILES,
+ * so section FILES refers to sockets by index, section SOCKET refers to FILES
+ * as usual by position in image. All the refs inside socket section are
+ * by index. When restoring we read socket section, create objects to hold
+ * mappings index <-> pos. At the second pass we open sockets (simultaneosly
+ * with their pairs) and create FILE objects.
+ */ 
+
+
+/* ====== FD passing ====== */
+
+/* Almost nobody does FD passing via AF_UNIX sockets, nevertheless we
+ * have to implement this. A problem is that in general case we receive
+ * skbs from an unknown context, so new files can arrive to checkpointed
+ * set of processes even after they are stopped. Well, we are going just
+ * to ignore unknown fds while doing real checkpointing. It is fair because
+ * links outside checkpointed set are going to fail anyway.
+ *
+ * ATTN: the procedure is recursive. We linearize the recursion adding
+ * newly found files to the end of file list, so they will be analyzed
+ * in the same loop.
+ */
+
+static int collect_one_passedfd(struct file *file, cpt_context_t * ctx)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct socket *sock;
+	struct sock *sk;
+	struct sk_buff *skb;
+
+	if (!inode->i_sock)
+		return -ENOTSOCK;
+
+	sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
+
+	if (sock->ops->family != AF_UNIX)
+		return 0;
+
+	sk = sock->sk;
+
+	/* Subtle locking issue. skbs cannot be removed while
+	 * we are scanning, because all the processes are stopped.
+	 * They still can be added to tail of queue. Locking while
+	 * we dereference skb->next is enough to resolve this.
+	 * See above about collision with skbs added after we started
+	 * checkpointing.
+	 */
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		if (UNIXCB(skb).fp && skb->sk &&
+		    (!sock_flag(skb->sk, SOCK_DEAD) || unix_peer(sk) == skb->sk)) {
+			struct scm_fp_list *fpl = UNIXCB(skb).fp;
+			int i;
+
+			for (i = fpl->count-1; i >= 0; i--) {
+				if (cpt_object_add(CPT_OBJ_FILE, fpl->fp[i], ctx) == NULL)
+					return -ENOMEM;
+			}
+		}
+
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+
+	return 0;
+}
+
+int cpt_collect_passedfds(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
+			int err;
+
+			if ((err = collect_one_passedfd(file, ctx)) < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+/* ====== End of FD passing ====== */
+
+/* Must be called under bh_lock_sock() */
+
+void clear_backlog(struct sock *sk)
+{
+	struct sk_buff *skb = sk->sk_backlog.head;
+
+	sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
+	while (skb) {
+		struct sk_buff *next = skb->next;
+
+		skb->next = NULL;
+		kfree_skb(skb);
+		skb = next;
+	}
+}
+
+void release_sock_nobacklog(struct sock *sk)
+{
+	spin_lock_bh(&(sk->sk_lock.slock));
+	clear_backlog(sk);
+	sk->sk_lock.owner = NULL;
+	if (waitqueue_active(&(sk->sk_lock.wq)))
+		wake_up(&(sk->sk_lock.wq));
+	spin_unlock_bh(&(sk->sk_lock.slock));
+}
+
+int cpt_dump_skb(int type, int owner, struct sk_buff *skb,
+		 struct cpt_context *ctx)
+{
+	struct cpt_skb_image *v = cpt_get_buf(ctx);
+	loff_t saved_obj;
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SKB;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_owner = owner;
+	v->cpt_queue = type;
+	v->cpt_stamp = cpt_timeval_export(&skb->stamp);
+	v->cpt_hspace = skb->data - skb->head;
+	v->cpt_tspace = skb->end - skb->tail;
+	v->cpt_h = skb->h.raw - skb->head;
+	v->cpt_nh = skb->nh.raw - skb->head;
+	v->cpt_mac = skb->mac.raw - skb->head;
+	if (sizeof(skb->cb) != sizeof(v->cpt_cb)) BUG();
+	memcpy(v->cpt_cb, skb->cb, sizeof(skb->cb));
+	v->cpt_len = skb->len;
+	v->cpt_mac_len = skb->mac_len;
+	v->cpt_csum = skb->csum;
+	v->cpt_local_df = skb->local_df;
+	v->cpt_pkt_type = skb->pkt_type;
+	v->cpt_ip_summed = skb->ip_summed;
+	v->cpt_priority = skb->priority;
+	v->cpt_protocol = skb->protocol;
+	v->cpt_tso_segs = skb_shinfo(skb)->tso_segs;
+	v->cpt_tso_size = skb_shinfo(skb)->tso_size;
+	v->cpt_security = skb->security;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (skb->len + (skb->data - skb->head) > 0) {
+		struct cpt_obj_bits ob;
+		loff_t saved_obj2;
+
+		cpt_push_object(&saved_obj2, ctx);
+		cpt_open_object(NULL, ctx);
+		ob.cpt_next = CPT_NULL;
+		ob.cpt_object = CPT_OBJ_BITS;
+		ob.cpt_hdrlen = sizeof(ob);
+		ob.cpt_content = CPT_CONTENT_DATA;
+		ob.cpt_size = skb->len + v->cpt_hspace;
+
+		ctx->write(&ob, sizeof(ob), ctx);
+
+		ctx->write(skb->head, (skb->data-skb->head) + (skb->len-skb->data_len), ctx);
+		if (skb->data_len) {
+			int offset = skb->len - skb->data_len;
+			while (offset < skb->len) {
+				int copy = skb->len - offset;
+				if (copy > PAGE_SIZE)
+					copy = PAGE_SIZE;
+				(void)cpt_get_buf(ctx);
+				if (skb_copy_bits(skb, offset, ctx->tmpbuf, copy))
+					BUG();
+				ctx->write(ctx->tmpbuf, copy, ctx);
+				__cpt_release_buf(ctx);
+				offset += copy;
+			}
+		}
+
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj2, ctx);
+	}
+
+	if (skb->sk && skb->sk->sk_family == AF_UNIX) {
+		struct scm_fp_list *fpl = UNIXCB(skb).fp;
+
+		if (fpl) {
+			int i;
+
+			for (i = 0; i < fpl->count; i++) {
+				struct cpt_fd_image v;
+				cpt_object_t *obj;
+				loff_t saved_obj2;
+
+				obj = lookup_cpt_object(CPT_OBJ_FILE, fpl->fp[i], ctx);
+
+				if (!obj) {
+					eprintk_ctx("lost passed FD\n");
+					return -EINVAL;
+				}
+
+				cpt_push_object(&saved_obj2, ctx);
+				cpt_open_object(NULL, ctx);
+				v.cpt_next = CPT_NULL;
+				v.cpt_object = CPT_OBJ_FILEDESC;
+				v.cpt_hdrlen = sizeof(v);
+				v.cpt_content = CPT_CONTENT_VOID;
+
+				v.cpt_fd = i;
+				v.cpt_file = obj->o_pos;
+				v.cpt_flags = 0;
+				ctx->write(&v, sizeof(v), ctx);
+				cpt_close_object(ctx);
+				cpt_pop_object(&saved_obj2, ctx);
+			}
+		}
+	}
+
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	return 0;
+}
+
+static int dump_rqueue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+	struct sock *sk_cache = NULL;
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		int err;
+
+		if (sk->sk_family == AF_UNIX) {
+			cpt_object_t *obj;
+			if (skb->sk != sk_cache) {
+				idx = -1;
+				sk_cache = NULL;
+				obj = lookup_cpt_object(CPT_OBJ_SOCKET, skb->sk, ctx);
+				if (obj) {
+					idx = obj->o_index;
+					sk_cache = skb->sk;
+				} else if (unix_peer(sk) != skb->sk)
+					goto next_skb;
+			}
+		}
+
+		err = cpt_dump_skb(CPT_SKB_RQ, idx, skb, ctx);
+		if (err)
+			return err;
+
+next_skb:
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+	return 0;
+}
+
+static int dump_wqueue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+
+	skb = skb_peek(&sk->sk_write_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_write_queue) {
+		int err = cpt_dump_skb(CPT_SKB_WQ, idx, skb, ctx);
+		if (err)
+			return err;
+
+		spin_lock_irq(&sk->sk_write_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_write_queue.lock);
+	}
+	return 0;
+}
+
+void cpt_dump_sock_attr(struct sock *sk, cpt_context_t *ctx)
+{
+	if (sk->sk_filter) {
+		loff_t saved_obj;
+		struct cpt_obj_bits v;
+
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_SKFILTER;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_DATA;
+		v.cpt_size = sk->sk_filter->len*sizeof(struct sock_filter);
+
+		ctx->write(&v, sizeof(v), ctx);
+		ctx->write(sk->sk_filter->insns, v.cpt_size, ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+}
+
+/* Dump socket content */
+
+int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx)
+{
+	struct cpt_sock_image *v = cpt_get_buf(ctx);
+	struct socket *sock;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SOCKET;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_file = CPT_NULL;
+	sock = sk->sk_socket;
+	if (sock && sock->file) {
+		cpt_object_t *tobj;
+		tobj = lookup_cpt_object(CPT_OBJ_FILE, sock->file, ctx);
+		if (tobj)
+			v->cpt_file = tobj->o_pos;
+	}
+	v->cpt_index = index;
+	v->cpt_parent = parent;
+
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
+		if (sock && !obj->o_lock) {
+			lock_sock(sk);
+			obj->o_lock = 1;
+		}
+	}
+
+	/* Some bits stored in inode */
+	v->cpt_ssflags = sock ? sock->flags : 0;
+	v->cpt_sstate = sock ? sock->state : 0;
+	v->cpt_passcred = sock ? sock->passcred : 0;
+
+	/* Common data */
+	v->cpt_family = sk->sk_family;
+	v->cpt_type = sk->sk_type;
+	v->cpt_state = sk->sk_state;
+	v->cpt_reuse = sk->sk_reuse;
+	v->cpt_zapped = sk->sk_zapped;
+	v->cpt_shutdown = sk->sk_shutdown;
+	v->cpt_userlocks = sk->sk_userlocks;
+	v->cpt_no_check = sk->sk_no_check;
+	v->cpt_debug = sk->sk_debug;
+	v->cpt_rcvtstamp = sk->sk_rcvtstamp;
+	v->cpt_localroute = sk->sk_localroute;
+	v->cpt_protocol = sk->sk_protocol;
+	v->cpt_err = sk->sk_err;
+	v->cpt_err_soft = sk->sk_err_soft;
+	v->cpt_max_ack_backlog = sk->sk_max_ack_backlog;
+	v->cpt_priority = sk->sk_priority;
+	v->cpt_rcvlowat = sk->sk_rcvlowat;
+	v->cpt_rcvtimeo = CPT_NULL;
+	if (sk->sk_rcvtimeo != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_rcvtimeo = sk->sk_rcvtimeo > INT_MAX ? INT_MAX : sk->sk_rcvtimeo;
+	v->cpt_sndtimeo = CPT_NULL;
+	if (sk->sk_sndtimeo != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_sndtimeo = sk->sk_sndtimeo > INT_MAX ? INT_MAX : sk->sk_sndtimeo;
+	v->cpt_rcvbuf = sk->sk_rcvbuf;
+	v->cpt_sndbuf = sk->sk_sndbuf;
+	v->cpt_bound_dev_if = sk->sk_bound_dev_if;
+	v->cpt_flags = sk->sk_flags;
+	v->cpt_lingertime = CPT_NULL;
+	if (sk->sk_lingertime != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_lingertime = sk->sk_lingertime > INT_MAX ? INT_MAX : sk->sk_lingertime;
+	v->cpt_peer_pid = sk->sk_peercred.pid;
+	v->cpt_peer_uid = sk->sk_peercred.uid;
+	v->cpt_peer_gid = sk->sk_peercred.gid;
+	v->cpt_stamp = cpt_timeval_export(&sk->sk_stamp);
+
+	v->cpt_peer = -1;
+	v->cpt_socketpair = 0;
+	v->cpt_deleted = 0;
+
+	v->cpt_laddrlen = 0;
+	if (sock) {
+		int alen = sizeof(v->cpt_laddr);
+		int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_laddr, &alen, 0);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		v->cpt_laddrlen = alen;
+	}
+	v->cpt_raddrlen = 0;
+	if (sock) {
+		int alen = sizeof(v->cpt_raddr);
+		int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_raddr, &alen, 2);
+		if (!err)
+			v->cpt_raddrlen = alen;
+	}
+
+	if (sk->sk_family == AF_UNIX) {
+		if (unix_sk(sk)->dentry) {
+			struct dentry *d = unix_sk(sk)->dentry;
+			v->cpt_deleted = !IS_ROOT(d) && d_unhashed(d);
+			if (!v->cpt_deleted) {
+				int err = 0;
+				char *path;
+				unsigned long pg = __get_free_page(GFP_KERNEL);
+
+				if (!pg) {
+					cpt_release_buf(ctx);
+					return -ENOMEM;
+				}
+
+				path = d_path(d, unix_sk(sk)->mnt, (char *)pg, PAGE_SIZE);
+
+				if (!IS_ERR(path)) {
+					int len = strlen(path);
+					if (len < 126) {
+						strcpy(((char*)v->cpt_laddr)+2, path); 
+						v->cpt_laddrlen = len + 2;
+					} else {
+						wprintk_ctx("af_unix path is too long: %s (%s)\n", path, ((char*)v->cpt_laddr)+2);
+					}
+					err = cpt_verify_overmount(path, d, unix_sk(sk)->mnt, ctx);
+				} else {
+					eprintk_ctx("cannot get path of an af_unix socket\n");
+					err = PTR_ERR(path);
+				}
+				free_page(pg);
+				if (err) {
+					cpt_release_buf(ctx);
+					return err;
+				}
+			}
+		}
+
+		/* If the socket is connected, find its peer. If peer is not
+		 * in our table, the socket is connected to external process
+		 * and we consider it disconnected.
+		 */
+		if (unix_peer(sk)) {
+			cpt_object_t *pobj;
+			pobj = lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(sk), ctx);
+			if (pobj)
+				v->cpt_peer = pobj->o_index;
+			else
+				v->cpt_shutdown = SHUTDOWN_MASK;
+
+			if (unix_peer(unix_peer(sk)) == sk)
+				v->cpt_socketpair = 1;
+		}
+
+		/* If the socket shares address with another socket it is
+		 * child of some listening socket. Find and record it. */
+		if (unix_sk(sk)->addr &&
+		    atomic_read(&unix_sk(sk)->addr->refcnt) > 1 &&
+		    sk->sk_state != TCP_LISTEN) {
+			cpt_object_t *pobj;
+			for_each_object(pobj, CPT_OBJ_SOCKET) {
+				struct sock *psk = pobj->o_obj;
+				if (psk->sk_family == AF_UNIX &&
+				    psk->sk_state == TCP_LISTEN &&
+				    unix_sk(psk)->addr == unix_sk(sk)->addr) {
+					v->cpt_parent = pobj->o_index;
+					break;
+				}
+			}
+		}
+	}
+
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+		cpt_dump_socket_in(v, sk, ctx);
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_dump_sock_attr(sk, ctx);
+
+	dump_rqueue(index, sk, ctx);
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
+		dump_wqueue(index, sk, ctx);
+		cpt_dump_ofo_queue(index, sk, ctx);
+	}
+
+	if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+	    && sk->sk_state == TCP_LISTEN)
+		cpt_dump_synwait_queue(sk, index, ctx);
+
+	cpt_close_object(ctx);
+
+	if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+	    && sk->sk_state == TCP_LISTEN)
+		cpt_dump_accept_queue(sk, index, ctx);
+
+	return 0;
+}
+
+int cpt_dump_orphaned_sockets(struct cpt_context *ctx)
+{
+	int i;
+
+	cpt_open_section(ctx, CPT_SECT_ORPHANS);
+
+	for (i = 0; i < tcp_ehash_size; i++) {
+		struct tcp_ehash_bucket *head = &tcp_ehash[i];
+		struct sock *sk;
+		struct hlist_node *node;
+
+retry:
+		read_lock_bh(&head->lock);
+		sk_for_each(sk, node, &head->chain) {
+
+			if (VE_OWNER_SK(sk) != get_exec_env())
+				continue;
+			if (sk->sk_socket)
+				continue;
+			if (!sock_flag(sk, SOCK_DEAD))
+				continue;
+			if (lookup_cpt_object(CPT_OBJ_SOCKET, sk, ctx))
+				continue;
+			sock_hold(sk);
+			read_unlock_bh(&head->lock);
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			if (sock_owned_by_user(sk))
+				eprintk_ctx("BUG: sk locked by whom?\n");
+			sk->sk_lock.owner = (void *)1;
+			bh_unlock_sock(sk);
+			local_bh_enable();
+
+			cpt_dump_socket(NULL, sk, -1, -1, ctx);
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			sk->sk_lock.owner = NULL;
+			clear_backlog(sk);
+			tcp_done(sk);
+			bh_unlock_sock(sk);
+			local_bh_enable();
+			sock_put(sk);
+
+			goto retry;
+		}
+		read_unlock_bh(&head->lock);
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int can_dump(struct sock *sk, cpt_context_t *ctx)
+{
+	switch (sk->sk_family) {
+	case AF_NETLINK:
+		if (((struct netlink_opt *)(sk->sk_protinfo))->cb) {
+			eprintk_ctx("netlink socket has active callback\n");
+			return 0;
+		}
+		break;
+	}
+	return 1;
+}
+
+/* We are not going to block suspend when we have external AF_UNIX connections.
+ * But we cannot stop feed of new packets/connections to our environment
+ * from outside. Taking into account that it is intrincically unreliable,
+ * we collect some amount of data, but when checkpointing/restoring we
+ * are going to drop everything, which does not make sense: skbs sent
+ * by outside processes, connections from outside etc. etc.
+ */
+
+/* The first pass. When we see socket referenced by a file, we just
+ * add it to socket table */
+int cpt_collect_socket(struct file *file, cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	struct socket *sock;
+	struct sock *sk;
+
+	if (!file->f_dentry->d_inode->i_sock)
+		return -ENOTSOCK;
+	sock = &container_of(file->f_dentry->d_inode, struct socket_alloc, vfs_inode)->socket;
+	sk = sock->sk;
+	if (!can_dump(sk, ctx))
+		return -EBUSY;
+	if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sk, ctx)) == NULL)
+		return -ENOMEM;
+	obj->o_parent = file;
+
+	return 0;
+}
+
+/*
+ * We should end with table containing:
+ *  * all sockets opened by our processes in the table.
+ *  * all the sockets queued in listening queues on _our_ listening sockets,
+ *    which are connected to our opened sockets.
+ */
+
+static int collect_one_unix_listening_sock(cpt_object_t *obj, cpt_context_t * ctx)
+{
+	struct sock *sk = obj->o_obj;
+	cpt_object_t *cobj;
+	struct sk_buff *skb;
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		struct sock *lsk = skb->sk;
+		if (unix_peer(lsk) &&
+		    lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(lsk), ctx)) {
+			if ((cobj = cpt_object_add(CPT_OBJ_SOCKET, lsk, ctx)) == NULL)
+				return -ENOMEM;
+			cobj->o_parent = obj->o_parent;
+		}
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+
+	return 0;
+}
+
+int cpt_index_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	unsigned long index = 0;
+
+	/* Collect not-yet-accepted children of listening sockets. */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+
+		if (sk->sk_state != TCP_LISTEN)
+			continue;
+
+		if (sk->sk_family == AF_UNIX)
+			collect_one_unix_listening_sock(obj, ctx);
+	}
+
+	/* Assign indices to all the sockets. */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if (sk->sk_socket && sk->sk_socket->file) {
+			cpt_object_t *tobj;
+			tobj = lookup_cpt_object(CPT_OBJ_FILE, sk->sk_socket->file, ctx);
+			if (tobj)
+				cpt_obj_setindex(tobj, obj->o_index, ctx);
+		}
+	}
+
+	return 0;
+}
+
+void cpt_unlock_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk && obj->o_lock) {
+			if (sk->sk_socket)
+				release_sock(sk);
+		}
+	}
+}
+
+void cpt_kill_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk && obj->o_lock) {
+			struct ve_struct *env = set_exec_env(sk->sk_owner_env);
+			cpt_kill_socket(sk, ctx);
+			if (sk->sk_socket)
+				release_sock_nobacklog(sk);
+			set_exec_env(env);
+		}
+	}
+}
+
+__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx)
+{
+	struct fasync_struct *fa;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct socket *sock;
+
+	sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
+
+	for (fa = sock->fasync_list; fa; fa = fa->fa_next) {
+		if (fa->fa_file == file)
+			return fa->fa_fd;
+	}
+	return -1;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_socket.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_socket.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_socket.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_socket.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,27 @@
+struct sock;
+
+int cpt_collect_passedfds(cpt_context_t *);
+int cpt_index_sockets(cpt_context_t *);
+int cpt_collect_socket(struct file *, cpt_context_t *);
+int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx);
+int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx);
+int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx);
+int rst_sockets(struct cpt_context *ctx);
+int rst_sockets_complete(struct cpt_context *ctx);
+int cpt_dump_orphaned_sockets(struct cpt_context *ctx);
+
+int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx);
+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx);
+
+void cpt_unlock_sockets(cpt_context_t *);
+void cpt_kill_sockets(cpt_context_t *);
+
+
+int cpt_kill_socket(struct sock *, cpt_context_t *);
+int cpt_dump_socket_in(struct cpt_sock_image *, struct sock *, struct cpt_context*);
+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *, struct cpt_context *ctx);
+__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx);
+int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *);
+int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si, loff_t pos, struct cpt_context *ctx);
+int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx);
+int cpt_dump_skb(int type, int owner, struct sk_buff *skb, struct cpt_context *ctx);
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_socket_in.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_socket_in.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_socket_in.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_socket_in.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,372 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/ipv6.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+static inline __u32 jiffies_export(unsigned long tmo)
+{
+	__s32 delta = (long)(tmo - jiffies);
+	return delta;
+}
+
+static inline __u32 tcp_jiffies_export(__u32 tmo)
+{
+	__s32 delta = tmo - tcp_time_stamp;
+	return delta;
+}
+
+int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+	struct tcp_opt *tp;
+
+	if (sk->sk_type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP)
+		return 0;
+
+	tp = tcp_sk(sk);
+
+	skb = skb_peek(&tp->out_of_order_queue);
+	while (skb && skb != (struct sk_buff*)&tp->out_of_order_queue) {
+		int err;
+
+		err = cpt_dump_skb(CPT_SKB_OFOQ, idx, skb, ctx);
+		if (err)
+			return err;
+
+		spin_lock_irq(&tp->out_of_order_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&tp->out_of_order_queue.lock);
+	}
+	return 0;
+}
+
+static int cpt_dump_socket_tcp(struct cpt_sock_image *si, struct sock *sk,
+			       struct cpt_context *ctx)
+{
+	struct tcp_opt *tp = tcp_sk(sk);
+
+	si->cpt_pred_flags = tp->pred_flags;
+	si->cpt_rcv_nxt = tp->rcv_nxt;
+	si->cpt_snd_nxt = tp->snd_nxt;
+	si->cpt_snd_una = tp->snd_una;
+	si->cpt_snd_sml = tp->snd_sml;
+	si->cpt_rcv_tstamp = tcp_jiffies_export(tp->rcv_tstamp);
+	si->cpt_lsndtime = tcp_jiffies_export(tp->lsndtime);
+	si->cpt_tcp_header_len = tp->tcp_header_len;
+	si->cpt_ack_pending = tp->ack.pending;
+	si->cpt_quick = tp->ack.quick;
+	si->cpt_pingpong = tp->ack.pingpong;
+	si->cpt_blocked = tp->ack.blocked;
+	si->cpt_ato = tp->ack.ato;
+	si->cpt_ack_timeout = jiffies_export(tp->ack.timeout);
+	si->cpt_lrcvtime = tcp_jiffies_export(tp->ack.lrcvtime);
+	si->cpt_last_seg_size = tp->ack.last_seg_size;
+	si->cpt_rcv_mss = tp->ack.rcv_mss;
+	si->cpt_snd_wl1 = tp->snd_wl1;
+	si->cpt_snd_wnd = tp->snd_wnd;
+	si->cpt_max_window = tp->max_window;
+	si->cpt_pmtu_cookie = tp->pmtu_cookie;
+	si->cpt_mss_cache = tp->mss_cache;
+	si->cpt_mss_cache_std = tp->mss_cache_std;
+	si->cpt_mss_clamp = tp->rx_opt.mss_clamp;
+	si->cpt_ext_header_len = tp->ext_header_len;
+	si->cpt_ext2_header_len = tp->ext2_header_len;
+	si->cpt_ca_state = tp->ca_state;
+	si->cpt_retransmits = tp->retransmits;
+	si->cpt_reordering = tp->reordering;
+	si->cpt_frto_counter = tp->frto_counter;
+	si->cpt_frto_highmark = tp->frto_highmark;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+	si->cpt_adv_cong = tp->adv_cong;
+#endif
+	si->cpt_defer_accept = tp->defer_accept;
+	si->cpt_backoff = tp->backoff;
+	si->cpt_srtt = tp->srtt;
+	si->cpt_mdev = tp->mdev;
+	si->cpt_mdev_max = tp->mdev_max;
+	si->cpt_rttvar = tp->rttvar;
+	si->cpt_rtt_seq = tp->rtt_seq;
+	si->cpt_rto = tp->rto;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+	si->cpt_packets_out = tp->packets_out.val;
+	si->cpt_left_out = tp->left_out.val;
+	si->cpt_retrans_out = tp->retrans_out.val;
+	si->cpt_lost_out = tp->lost_out.val;
+	si->cpt_sacked_out = tp->sacked_out.val;
+	si->cpt_fackets_out = tp->fackets_out.val;
+#else
+	si->cpt_packets_out = tp->packets_out;
+	si->cpt_left_out = tp->left_out;
+	si->cpt_retrans_out = tp->retrans_out;
+	si->cpt_lost_out = tp->lost_out;
+	si->cpt_sacked_out = tp->sacked_out;
+	si->cpt_fackets_out = tp->fackets_out;
+#endif
+	si->cpt_snd_ssthresh = tp->snd_ssthresh;
+	si->cpt_snd_cwnd = tp->snd_cwnd;
+	si->cpt_snd_cwnd_cnt = tp->snd_cwnd_cnt;
+	si->cpt_snd_cwnd_clamp = tp->snd_cwnd_clamp;
+	si->cpt_snd_cwnd_used = tp->snd_cwnd_used;
+	si->cpt_snd_cwnd_stamp = tcp_jiffies_export(tp->snd_cwnd_stamp);
+	si->cpt_timeout = jiffies_export(tp->timeout);
+	si->cpt_ka_timeout = 0;
+	si->cpt_rcv_wnd = tp->rcv_wnd;
+	si->cpt_rcv_wup = tp->rcv_wup;
+	si->cpt_write_seq = tp->write_seq;
+	si->cpt_pushed_seq = tp->pushed_seq;
+	si->cpt_copied_seq = tp->copied_seq;
+	si->cpt_tstamp_ok = tp->rx_opt.tstamp_ok;
+	si->cpt_wscale_ok = tp->rx_opt.wscale_ok;
+	si->cpt_sack_ok = tp->rx_opt.sack_ok;
+	si->cpt_saw_tstamp = tp->rx_opt.saw_tstamp;
+	si->cpt_snd_wscale = tp->rx_opt.snd_wscale;
+	si->cpt_rcv_wscale = tp->rx_opt.rcv_wscale;
+	si->cpt_nonagle = tp->nonagle;
+	si->cpt_keepalive_probes = tp->keepalive_probes;
+	si->cpt_rcv_tsval = tp->rx_opt.rcv_tsval;
+	si->cpt_rcv_tsecr = tp->rx_opt.rcv_tsecr;
+	si->cpt_ts_recent = tp->rx_opt.ts_recent;
+	si->cpt_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+	si->cpt_user_mss = tp->rx_opt.user_mss;
+	si->cpt_dsack = tp->rx_opt.dsack;
+	si->cpt_eff_sacks = tp->rx_opt.eff_sacks;
+	si->cpt_sack_array[0] = tp->duplicate_sack[0].start_seq;
+	si->cpt_sack_array[1] = tp->duplicate_sack[0].end_seq;
+	si->cpt_sack_array[2] = tp->selective_acks[0].start_seq;
+	si->cpt_sack_array[3] = tp->selective_acks[0].end_seq;
+	si->cpt_sack_array[4] = tp->selective_acks[1].start_seq;
+	si->cpt_sack_array[5] = tp->selective_acks[1].end_seq;
+	si->cpt_sack_array[6] = tp->selective_acks[2].start_seq;
+	si->cpt_sack_array[7] = tp->selective_acks[2].end_seq;
+	si->cpt_sack_array[8] = tp->selective_acks[3].start_seq;
+	si->cpt_sack_array[9] = tp->selective_acks[3].end_seq;
+	si->cpt_window_clamp = tp->window_clamp;
+	si->cpt_rcv_ssthresh = tp->rcv_ssthresh;
+	si->cpt_probes_out = tp->probes_out;
+	si->cpt_num_sacks = tp->rx_opt.num_sacks;
+	si->cpt_advmss = tp->advmss;
+	si->cpt_syn_retries = tp->syn_retries;
+	si->cpt_ecn_flags = tp->ecn_flags;
+	si->cpt_prior_ssthresh = tp->prior_ssthresh;
+	si->cpt_high_seq = tp->high_seq;
+	si->cpt_retrans_stamp = tp->retrans_stamp;
+	si->cpt_undo_marker = tp->undo_marker;
+	si->cpt_undo_retrans = tp->undo_retrans;
+	si->cpt_urg_seq = tp->urg_seq;
+	si->cpt_urg_data = tp->urg_data;
+	si->cpt_pending = tp->pending;
+	si->cpt_urg_mode = tp->urg_mode;
+	si->cpt_snd_up = tp->snd_up;
+	si->cpt_keepalive_time = tp->keepalive_time;
+	si->cpt_keepalive_intvl = tp->keepalive_intvl;
+	si->cpt_linger2 = tp->linger2;
+
+	if (sk->sk_state != TCP_LISTEN &&
+	    sk->sk_state != TCP_CLOSE &&
+	    sock_flag(sk, SOCK_KEEPOPEN)) {
+		si->cpt_ka_timeout = jiffies_export(sk->sk_timer.expires);
+	}
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	if (sk->sk_family == AF_INET6 && tp->af_specific == &ipv6_mapped)
+		si->cpt_mapped = 1;
+#endif
+
+	return 0;
+}
+
+
+int cpt_dump_socket_in(struct cpt_sock_image *si, struct sock *sk,
+		       struct cpt_context *ctx)
+{
+	struct inet_opt *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	if (sk->sk_family == AF_INET) {
+		struct sockaddr_in *sin = ((struct sockaddr_in*)si->cpt_laddr);
+		sin->sin_family = AF_INET;
+		sin->sin_port = inet->sport;
+		sin->sin_addr.s_addr = inet->rcv_saddr;
+		si->cpt_laddrlen = sizeof(*sin);
+	} else if (sk->sk_family == AF_INET6) {
+		struct sockaddr_in6 *sin6 = ((struct sockaddr_in6*)si->cpt_laddr);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = inet->sport;
+		memcpy(&sin6->sin6_addr, &np->rcv_saddr, 16);
+		si->cpt_laddrlen = sizeof(*sin6);
+	}
+	if (!inet->num)
+		si->cpt_laddrlen = 0;
+
+	si->cpt_daddr = inet->daddr;
+	si->cpt_dport = inet->dport;
+	si->cpt_saddr = inet->saddr;
+	si->cpt_rcv_saddr = inet->rcv_saddr;
+	si->cpt_sport = inet->sport;
+	si->cpt_uc_ttl = inet->uc_ttl;
+	si->cpt_tos = inet->tos;
+	si->cpt_cmsg_flags = inet->cmsg_flags;
+	si->cpt_mc_index = inet->mc_index;
+	si->cpt_mc_addr = inet->mc_addr;
+	si->cpt_hdrincl = inet->hdrincl;
+	si->cpt_mc_ttl = inet->mc_ttl;
+	si->cpt_mc_loop = inet->mc_loop;
+	si->cpt_pmtudisc = inet->pmtudisc;
+	si->cpt_recverr = inet->recverr;
+	si->cpt_freebind = inet->freebind;
+	si->cpt_idcounter = inet->id;
+
+	si->cpt_cork_flags = inet->cork.flags;
+	si->cpt_cork_fragsize = 0;
+	si->cpt_cork_length = inet->cork.length;
+	si->cpt_cork_addr = inet->cork.addr;
+	si->cpt_cork_saddr = inet->cork.fl.fl4_src;
+	si->cpt_cork_daddr = inet->cork.fl.fl4_dst;
+	si->cpt_cork_oif = inet->cork.fl.oif;
+	if (inet->cork.rt) {
+		si->cpt_cork_fragsize = inet->cork.fragsize;
+		si->cpt_cork_saddr = inet->cork.rt->fl.fl4_src;
+		si->cpt_cork_daddr = inet->cork.rt->fl.fl4_dst;
+		si->cpt_cork_oif = inet->cork.rt->fl.oif;
+	}
+
+	if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
+		struct udp_opt *up = udp_sk(sk);
+		si->cpt_udp_pending  = up->pending;
+		si->cpt_udp_corkflag  = up->corkflag;
+		si->cpt_udp_encap  = up->encap_type;
+		si->cpt_udp_len  = up->len;
+	}
+
+	if (sk->sk_family == AF_INET6) {
+		memcpy(si->cpt_saddr6, &np->saddr, 16);
+		memcpy(si->cpt_rcv_saddr6, &np->rcv_saddr, 16);
+		memcpy(si->cpt_daddr6, &np->daddr, 16);
+		si->cpt_flow_label6 = np->flow_label;
+		si->cpt_frag_size6 = np->frag_size;
+		si->cpt_hop_limit6 = np->hop_limit;
+		si->cpt_mcast_hops6 = np->mcast_hops;
+		si->cpt_mcast_oif6 = np->mcast_oif;
+		si->cpt_rxopt6 = np->rxopt.all;
+		si->cpt_mc_loop6 = np->mc_loop;
+		si->cpt_recverr6 = np->recverr;
+		si->cpt_sndflow6 = np->sndflow;
+		si->cpt_pmtudisc6 = np->pmtudisc;
+		si->cpt_ipv6only6 = np->ipv6only;
+		si->cpt_mapped = 0;
+	}
+
+	if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
+		cpt_dump_socket_tcp(si, sk, ctx);
+
+	return 0;
+}
+
+int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx)
+{
+	struct tcp_opt *tp = tcp_sk(sk);
+	struct open_request *req;
+
+	for (req=tp->accept_queue; req; req=req->dl_next)
+		cpt_dump_socket(NULL, req->sk, -1, index, ctx);
+	return 0;
+}
+
+
+static int dump_openreq(struct open_request *req, struct sock *sk, int index,
+			struct cpt_context *ctx)
+{
+	struct cpt_openreq_image *v = cpt_get_buf(ctx);
+
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_OPENREQ;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_rcv_isn = req->rcv_isn;
+	v->cpt_snt_isn = req->snt_isn;
+	v->cpt_rmt_port = req->rmt_port;
+	v->cpt_mss = req->mss;
+	v->cpt_family = (req->class == &or_ipv4 ? AF_INET : AF_INET6);
+	v->cpt_retrans = req->retrans;
+	v->cpt_snd_wscale = req->snd_wscale;
+	v->cpt_rcv_wscale = req->rcv_wscale;
+	v->cpt_tstamp_ok = req->tstamp_ok;
+	v->cpt_sack_ok = req->sack_ok;
+	v->cpt_wscale_ok = req->wscale_ok;
+	v->cpt_ecn_ok = req->ecn_ok;
+	v->cpt_acked = req->acked;
+	v->cpt_window_clamp = req->window_clamp;
+	v->cpt_rcv_wnd = req->rcv_wnd;
+	v->cpt_ts_recent = req->ts_recent;
+	v->cpt_expires = jiffies_export(req->expires);
+
+	if (v->cpt_family == AF_INET) {
+		memcpy(v->cpt_loc_addr, &req->af.v4_req.loc_addr, 4);
+		memcpy(v->cpt_rmt_addr, &req->af.v4_req.rmt_addr, 4);
+	} else {
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		memcpy(v->cpt_loc_addr, &req->af.v6_req.loc_addr, 16);
+		memcpy(v->cpt_rmt_addr, &req->af.v6_req.rmt_addr, 16);
+		v->cpt_iif = req->af.v6_req.iif;
+#endif
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx)
+{
+	struct tcp_opt *tp = tcp_sk(sk);
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	struct open_request *req;
+	int i;
+
+	for (i=0; i<TCP_SYNQ_HSIZE; i++) {
+		for (req=lopt->syn_table[i]; req; req=req->dl_next) {
+			loff_t saved_obj;
+			cpt_push_object(&saved_obj, ctx);
+			dump_openreq(req, sk, index, ctx);
+			cpt_pop_object(&saved_obj, ctx);
+		}
+	}
+	return 0;
+}
+
+
+int cpt_kill_socket(struct sock *sk, cpt_context_t * ctx)
+{
+	if (sk->sk_state != TCP_CLOSE &&
+	    (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
+	    sk->sk_protocol == IPPROTO_TCP) {
+		if (sk->sk_state != TCP_LISTEN)
+			tcp_set_state(sk, TCP_CLOSE);
+		else
+			sk->sk_prot->disconnect(sk, 0);
+	}
+	return 0;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_syscalls.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_syscalls.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_syscalls.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_syscalls.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,95 @@
+#include <linux/unistd.h>
+#include <linux/syscalls.h>
+#include <asm/uaccess.h>
+
+#define WRAP(c, args) return sys_##c args
+#define WRAP2(c, args) int err; mm_segment_t oldfs; \
+	               oldfs = get_fs(); set_fs(KERNEL_DS); \
+                       err = sys_##c args ;\
+                       set_fs(oldfs); \
+                       return err
+
+static inline int sc_close(int fd)
+{
+	WRAP(close, (fd));
+}
+
+static inline int sc_dup2(int fd1, int fd2)
+{
+	WRAP(dup2, (fd1, fd2));
+}
+
+static inline int sc_unlink(char *name)
+{
+	WRAP2(unlink, (name));
+}
+
+static inline int sc_pipe(int *pfd)
+{
+	return do_pipe(pfd);
+}
+
+static inline int sc_mknod(char *name, int mode, int dev)
+{
+	WRAP2(mknod, (name, mode, dev));
+}
+
+static inline int sc_chmod(char *name, int mode)
+{
+	WRAP2(mkdir, (name, mode));
+}
+
+static inline int sc_chown(char *name, int uid, int gid)
+{
+	WRAP2(chown, (name, uid, gid));
+}
+
+static inline int sc_mkdir(char *name, int mode)
+{
+	WRAP2(mkdir, (name, mode));
+}
+
+static inline int sc_rmdir(char *name)
+{
+	WRAP2(rmdir, (name));
+}
+
+static inline int sc_mount(char *mntdev, char *mntpnt, char *type, unsigned long flags)
+{
+	WRAP2(mount, (mntdev ? : "none", mntpnt, type, flags, NULL));
+}
+
+static inline int sc_mprotect(unsigned long start, size_t len,
+			      unsigned long prot)
+{
+	WRAP(mprotect, (start, len, prot));
+}
+
+static inline int sc_mlock(unsigned long start, size_t len)
+{
+	WRAP(mlock, (start, len));
+}
+
+static inline int sc_munlock(unsigned long start, size_t len)
+{
+	WRAP(munlock, (start, len));
+}
+
+static inline int sc_remap_file_pages(unsigned long start, size_t len,
+				      unsigned long prot, unsigned long pgoff,
+				      unsigned long flags)
+{
+	WRAP(remap_file_pages, (start, len, prot, pgoff, flags));
+}
+
+static inline int sc_waitx(int pid, int opt)
+{
+	WRAP(wait4, (pid, NULL, opt, NULL));
+}
+
+static inline int sc_flock(int fd, int flags)
+{
+	WRAP(flock, (fd, flags));
+}
+
+extern int sc_execve(char *cms, char **argv, char **env);
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_sysvipc.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_sysvipc.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_sysvipc.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_sysvipc.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,393 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+
+struct _warg {
+		struct file			*file;
+		struct cpt_sysvshm_image	*v;
+};
+
+static int dump_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	struct _warg *warg = arg;
+	struct cpt_sysvshm_image *v = (struct cpt_sysvshm_image *)warg->v;
+
+	if (shp->shm_file != warg->file)
+		return 0;
+
+	v->cpt_key = shp->shm_perm.key;
+	v->cpt_uid = shp->shm_perm.uid;
+	v->cpt_gid = shp->shm_perm.gid;
+	v->cpt_cuid = shp->shm_perm.cuid;
+	v->cpt_cgid = shp->shm_perm.cgid;
+	v->cpt_mode = shp->shm_perm.mode;
+	v->cpt_seq = shp->shm_perm.seq;
+
+	v->cpt_id = shp->id;
+	v->cpt_segsz = shp->shm_segsz;
+	v->cpt_atime = shp->shm_atim;
+	v->cpt_ctime = shp->shm_ctim;
+	v->cpt_dtime = shp->shm_dtim;
+	v->cpt_creator = shp->shm_cprid;
+	v->cpt_last = shp->shm_lprid;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+	v->cpt_mlockuser = shp->mlock_user ? shp->mlock_user->uid : -1;
+#else
+	v->cpt_mlockuser = -1;
+#endif
+	return 1;
+}
+
+int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx)
+{
+	struct cpt_sysvshm_image *v = cpt_get_buf(ctx);
+	struct _warg warg;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_SYSV_SHM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	warg.file = file;
+	warg.v = v;
+	if (sysvipc_walk_shm(dump_one_shm, &warg) == 0) {
+		cpt_release_buf(ctx);
+		return -ESRCH;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+
+int match_sem(int id, struct sem_array *sema, void *arg)
+{
+	if (id != (unsigned long)arg)
+		return 0;
+	return sema->sem_nsems + 1;
+}
+
+static int get_sem_nsem(int id, cpt_context_t *ctx)
+{
+	int res;
+	res = sysvipc_walk_sem(match_sem, (void*)(unsigned long)id);
+	if (res > 0)
+		return res - 1;
+	eprintk_ctx("get_sem_nsem: SYSV semaphore %d not found\n", id);
+	return -ESRCH;
+}
+
+static int dump_one_semundo(struct sem_undo *su, struct cpt_context *ctx)
+{
+	struct cpt_sysvsem_undo_image v;
+	loff_t saved_obj;
+
+	cpt_open_object(NULL, ctx);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_SYSVSEM_UNDO_REC;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_SEMUNDO;
+	v.cpt_id = su->semid;
+	v.cpt_nsem = get_sem_nsem(su->semid, ctx);
+	if ((int)v.cpt_nsem < 0)
+		return -ESRCH;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	ctx->write(su->semadj, v.cpt_nsem*sizeof(short), ctx);
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+struct sem_warg {
+	int				last_id;
+	struct cpt_sysvsem_image	*v;
+};
+
+static int dump_one_sem(int id, struct sem_array *sma, void *arg)
+{
+	struct sem_warg * warg = (struct sem_warg *)arg;
+	struct cpt_sysvsem_image *v = warg->v;
+	int i;
+
+	if (warg->last_id != -1) {
+		if ((id % IPCMNI) <= warg->last_id)
+			return 0;
+	}
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_SYSV_SEM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_SEMARRAY;
+
+	v->cpt_key = sma->sem_perm.key;
+	v->cpt_uid = sma->sem_perm.uid;
+	v->cpt_gid = sma->sem_perm.gid;
+	v->cpt_cuid = sma->sem_perm.cuid;
+	v->cpt_cgid = sma->sem_perm.cgid;
+	v->cpt_mode = sma->sem_perm.mode;
+	v->cpt_seq = sma->sem_perm.seq;
+
+	v->cpt_id = id;
+	v->cpt_ctime = sma->sem_ctime;
+	v->cpt_otime = sma->sem_otime;
+
+	for (i=0; i<sma->sem_nsems; i++) {
+		struct {
+			__u32 semval;
+			__u32 sempid;
+		} *s = (void*)v + v->cpt_next;
+		if (v->cpt_next >= PAGE_SIZE - sizeof(*s))
+			return -EINVAL;
+		s->semval = sma->sem_base[i].semval;
+		s->sempid = sma->sem_base[i].sempid;
+		v->cpt_next += sizeof(*s);
+	}
+
+	warg->last_id = id % IPCMNI;
+	return 1;
+}
+
+
+int cpt_dump_sysvsem(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	struct sem_warg warg;
+
+	/* Dumping semaphores is quite tricky because we cannot
+	 * write to dump file under lock inside sysvipc_walk_sem().
+	 */
+	cpt_open_section(ctx, CPT_SECT_SYSV_SEM);
+	warg.last_id = -1;
+	warg.v = cpt_get_buf(ctx);
+	for (;;) {
+		if (sysvipc_walk_sem(dump_one_sem, &warg) <= 0)
+			break;
+		ctx->write(warg.v, warg.v->cpt_next, ctx);
+	}
+	cpt_release_buf(ctx);
+	cpt_close_section(ctx);
+
+	cpt_open_section(ctx, CPT_SECT_SYSVSEM_UNDO);
+	for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
+		struct sem_undo_list *semu = obj->o_obj;
+		struct sem_undo *su;
+		struct cpt_object_hdr v;
+		loff_t saved_obj;
+
+		cpt_open_object(obj, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_SYSVSEM_UNDO;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_ARRAY;
+
+		ctx->write(&v, sizeof(v), ctx);
+
+		cpt_push_object(&saved_obj, ctx);
+		for (su = semu->proc_list; su; su = su->proc_next) {
+			if (su->semid != -1) {
+				int err;
+				err = dump_one_semundo(su, ctx);
+				if (err < 0)
+					return err;
+			}
+		}
+		cpt_pop_object(&saved_obj, ctx);
+
+		cpt_close_object(ctx);
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+struct msg_warg {
+	int				last_id;
+	struct msg_queue		*msq;
+	struct cpt_sysvmsg_image	*v;
+};
+
+static int dump_one_msg(int id, struct msg_queue *msq, void *arg)
+{
+	struct msg_warg * warg = (struct msg_warg *)arg;
+	struct cpt_sysvmsg_image *v = warg->v;
+
+	if (warg->last_id != -1) {
+		if ((id % IPCMNI) <= warg->last_id)
+			return 0;
+	}
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_SYSVMSG;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_key = msq->q_perm.key;
+	v->cpt_uid = msq->q_perm.uid;
+	v->cpt_gid = msq->q_perm.gid;
+	v->cpt_cuid = msq->q_perm.cuid;
+	v->cpt_cgid = msq->q_perm.cgid;
+	v->cpt_mode = msq->q_perm.mode;
+	v->cpt_seq = msq->q_perm.seq;
+
+	v->cpt_id = id;
+	v->cpt_stime = msq->q_stime;
+	v->cpt_rtime = msq->q_rtime;
+	v->cpt_ctime = msq->q_ctime;
+	v->cpt_last_sender = msq->q_lspid;
+	v->cpt_last_receiver = msq->q_lrpid;
+	v->cpt_qbytes = msq->q_qbytes;
+
+	warg->msq = msq;
+	warg->last_id = id % IPCMNI;
+	return 1;
+}
+
+static int do_store(void * src, int len, int offset, void * data)
+{
+	cpt_context_t * ctx = data;
+	ctx->write(src, len, ctx);
+	return 0;
+}
+
+static void cpt_dump_one_sysvmsg(struct msg_msg *m, cpt_context_t * ctx)
+{
+	loff_t saved_obj;
+	struct cpt_sysvmsg_msg_image mv;
+
+	cpt_open_object(NULL, ctx);
+	mv.cpt_next = CPT_NULL;
+	mv.cpt_object = CPT_OBJ_SYSVMSG_MSG;
+	mv.cpt_hdrlen = sizeof(mv);
+	mv.cpt_content = CPT_CONTENT_DATA;
+
+	mv.cpt_type = m->m_type;
+	mv.cpt_size = m->m_ts;
+
+	ctx->write(&mv, sizeof(mv), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	sysv_msg_store(m, do_store, m->m_ts, ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	cpt_close_object(ctx);
+}
+
+int cpt_dump_sysvmsg(struct cpt_context *ctx)
+{
+	struct msg_warg warg;
+
+	/* Dumping msg queues is tricky because we cannot
+	 * write to dump file under lock inside sysvipc_walk_msg().
+	 *
+	 * And even worse, we have to access msg list in an unserialized
+	 * context. It is fragile. But VE is still frozen, remember?
+	 */
+	cpt_open_section(ctx, CPT_SECT_SYSV_MSG);
+	warg.last_id = -1;
+	warg.v = cpt_get_buf(ctx);
+	for (;;) {
+		loff_t saved_obj;
+		struct msg_msg * m;
+
+		if (sysvipc_walk_msg(dump_one_msg, &warg) <= 0)
+			break;
+
+		cpt_open_object(NULL, ctx);
+
+		ctx->write(warg.v, warg.v->cpt_next, ctx);
+
+		cpt_push_object(&saved_obj, ctx);
+		list_for_each_entry(m, &warg.msq->q_messages, m_list) {
+			cpt_dump_one_sysvmsg(m, ctx);
+		}
+		cpt_pop_object(&saved_obj, ctx);
+
+		cpt_close_object(ctx);
+	}
+	cpt_release_buf(ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int cpt_collect_sysvsem_undo(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->exit_state) {
+			/* ipc/sem.c forgets to clear tsk->sysvsem.undo_list
+			 * on exit. Grrr... */
+			continue;
+		}
+		if (tsk->sysvsem.undo_list &&
+		    cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
+		struct sem_undo_list *semu = obj->o_obj;
+
+		if (atomic_read(&semu->refcnt) != obj->o_count) {
+			eprintk_ctx("sem_undo_list is referenced outside %d %d\n", obj->o_count, atomic_read(&semu->refcnt));
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+static int collect_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	cpt_context_t *ctx = arg;
+
+	if (__cpt_object_add(CPT_OBJ_FILE, shp->shm_file, GFP_ATOMIC, ctx) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+int cpt_collect_sysvshm(cpt_context_t * ctx)
+{
+	int err;
+
+	err = sysvipc_walk_shm(collect_one_shm, ctx);
+
+	return err < 0 ? err : 0;
+}
+
+int cpt_collect_sysv(cpt_context_t * ctx)
+{
+	int err;
+
+	err = cpt_collect_sysvsem_undo(ctx);
+	if (err)
+		return err;
+	err = cpt_collect_sysvshm(ctx);
+	if (err)
+		return err;
+
+	return 0;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_tty.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_tty.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_tty.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_tty.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,205 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/tty.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+/* We must support at least N_TTY. */
+
+int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx)
+{
+	struct tty_struct *tty = file->private_data;
+	cpt_object_t *obj;
+	struct cpt_obj_ref o;
+	loff_t saved_pos;
+
+	obj = lookup_cpt_object(CPT_OBJ_TTY, tty, ctx);
+	if (!obj)
+		return -EINVAL;
+
+	cpt_push_object(&saved_pos, ctx);
+
+	o.cpt_next = sizeof(o);
+	o.cpt_object = CPT_OBJ_REF;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_VOID;
+	o.cpt_pos = obj->o_pos;
+	ctx->write(&o, sizeof(o), ctx);
+
+	cpt_pop_object(&saved_pos, ctx);
+
+	return 0;
+}
+
+int cpt_collect_tty(struct file *file, cpt_context_t * ctx)
+{
+	struct tty_struct *tty = file->private_data;
+
+	if (tty) {
+		if (cpt_object_add(CPT_OBJ_TTY, tty, ctx) == NULL)
+			return -ENOMEM;
+		if (tty->link) {
+			cpt_object_t *obj;
+
+			obj = cpt_object_add(CPT_OBJ_TTY, tty->link, ctx);
+			if (obj == NULL)
+				return -ENOMEM;
+			/* Undo o_count, tty->link is not a reference */
+			obj->o_count--;
+		}
+	}
+	return 0;
+}
+
+int cpt_dump_tty(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct tty_struct *tty = obj->o_obj;
+	struct cpt_tty_image *v;
+
+	if (tty->link) {
+		if (lookup_cpt_object(CPT_OBJ_TTY, tty->link, ctx) == NULL) {
+			eprintk_ctx("orphan pty %s %d\n", tty->name, tty->driver->subtype == PTY_TYPE_SLAVE);
+			return -EINVAL;
+		}
+		if (tty->link->link != tty) {
+			eprintk_ctx("bad pty pair\n");
+			return -EINVAL;
+		}
+		if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+		    tty->driver->subtype == PTY_TYPE_SLAVE &&
+		    tty->link->count)
+			obj->o_count++;
+	}
+	if (obj->o_count != tty->count) {
+		eprintk_ctx("tty %s is referenced outside %d %d\n", tty->name, obj->o_count, tty->count);
+		return -EBUSY;
+	}
+
+	cpt_open_object(obj, ctx);
+
+	v = cpt_get_buf(ctx);
+	v->cpt_next = -1;
+	v->cpt_object = CPT_OBJ_TTY;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_index = tty->index;
+	v->cpt_link = -1;
+	if (tty->link)
+		v->cpt_link = tty->link->index;
+	v->cpt_drv_type = tty->driver->type;
+	v->cpt_drv_subtype = tty->driver->subtype;
+	v->cpt_drv_flags = tty->driver->flags;
+	v->cpt_packet = tty->packet;
+	v->cpt_stopped = tty->stopped;
+	v->cpt_hw_stopped = tty->hw_stopped;
+	v->cpt_flow_stopped = tty->flow_stopped;
+	v->cpt_flags = tty->flags;
+	v->cpt_ctrl_status = tty->ctrl_status;
+	v->cpt_canon_data = tty->canon_data;
+	v->cpt_canon_head = tty->canon_head - tty->read_tail;
+	v->cpt_canon_column = tty->canon_column;
+	v->cpt_column = tty->column;
+	v->cpt_erasing = tty->erasing;
+	v->cpt_lnext = tty->lnext;
+	v->cpt_icanon = tty->icanon;
+	v->cpt_raw = tty->raw;
+	v->cpt_real_raw = tty->real_raw;
+	v->cpt_closing = tty->closing;
+	v->cpt_minimum_to_wake = tty->minimum_to_wake;
+	v->cpt_pgrp = 0;
+	if (tty->pgrp > 0) {
+		v->cpt_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, tty->pgrp);
+		if ((int)v->cpt_pgrp < 0) {
+			dprintk_ctx("cannot map tty->pgrp %d -> %d\n", tty->pgrp, (int)v->cpt_pgrp);
+			v->cpt_pgrp = -1;
+		}
+	}
+	v->cpt_session = 0;
+	if (tty->session > 0) {
+		v->cpt_session = _pid_type_to_vpid(PIDTYPE_SID, tty->session);
+		if ((int)v->cpt_session < 0) {
+			eprintk_ctx("cannot map tty->session %d -> %d\n", tty->session, (int)v->cpt_session);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	memcpy(v->cpt_name, tty->name, 64);
+	v->cpt_ws_row = tty->winsize.ws_row;
+	v->cpt_ws_col = tty->winsize.ws_col;
+	v->cpt_ws_prow = tty->winsize.ws_ypixel;
+	v->cpt_ws_pcol = tty->winsize.ws_xpixel;
+	if (tty->termios == NULL) {
+		eprintk_ctx("NULL termios");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_c_line = tty->termios->c_line;
+	v->cpt_c_iflag = tty->termios->c_iflag;
+	v->cpt_c_oflag = tty->termios->c_oflag;
+	v->cpt_c_cflag = tty->termios->c_cflag;
+	v->cpt_c_lflag = tty->termios->c_lflag;
+	memcpy(v->cpt_c_cc, tty->termios->c_cc, NCCS);
+	if (NCCS < 32)
+		memset(v->cpt_c_cc + NCCS, 255, 32 - NCCS);
+	memcpy(v->cpt_read_flags, tty->read_flags, sizeof(v->cpt_read_flags));
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (tty->read_buf && tty->read_cnt) {
+		struct cpt_obj_bits *v = cpt_get_buf(ctx);
+		loff_t saved_pos;
+
+		cpt_push_object(&saved_pos, ctx);
+		cpt_open_object(NULL, ctx);
+		v->cpt_next = CPT_NULL;
+		v->cpt_object = CPT_OBJ_BITS;
+		v->cpt_hdrlen = sizeof(*v);
+		v->cpt_content = CPT_CONTENT_DATA;
+		v->cpt_size = tty->read_cnt;
+		ctx->write(v, sizeof(*v), ctx);
+		cpt_release_buf(ctx);
+
+		if (tty->read_cnt) {
+			int n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
+			ctx->write(tty->read_buf + tty->read_tail, n, ctx);
+			if (tty->read_cnt > n)
+				ctx->write(tty->read_buf, tty->read_cnt-n, ctx);
+			ctx->align(ctx);
+		}
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_pos, ctx);
+	}
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx)
+{
+	struct tty_struct * tty;
+	struct fasync_struct *fa;
+
+	tty = (struct tty_struct *)file->private_data;
+
+	for (fa = tty->fasync; fa; fa = fa->fa_next) {
+		if (fa->fa_file == file)
+			return fa->fa_fd;
+	}
+	return -1;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_ubc.c linux-2.6.9-ve023stab054/kernel/cpt/cpt_ubc.c
--- linux-2.6.9-100.orig/kernel/cpt/cpt_ubc.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_ubc.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,121 @@
+#include <linux/types.h>
+#include <ub/beancounter.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = cpt_object_add(CPT_OBJ_UBC, bc, ctx);
+	if (obj != NULL) {
+		if (obj->o_count == 1)
+			get_beancounter(bc);
+		if (bc->parent != NULL && obj->o_parent == NULL)
+			obj->o_parent = cpt_add_ubc(bc->parent, ctx);
+	}
+	return obj;
+}
+
+__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(CPT_OBJ_UBC, bc, ctx);
+	if (obj == NULL) {
+		char buf[48];
+		print_ub_uid(bc, buf, sizeof(buf));
+		printk(KERN_ERR "CPT: unknown ub %s (%p)\n", buf, bc);
+		dump_stack();
+		return CPT_NULL;
+	}
+	return obj->o_pos;
+}
+
+static void dump_one_bc_parm(struct cpt_ubparm *dmp, struct ubparm *prm,
+		int held)
+{
+	dmp->barrier = (prm->barrier < UB_MAXVALUE ? prm->barrier : CPT_NULL);
+	dmp->limit = (prm->limit < UB_MAXVALUE ? prm->limit : CPT_NULL);
+	dmp->held = (held ? prm->held : CPT_NULL);
+	dmp->maxheld = prm->maxheld;
+	dmp->minheld = prm->minheld;
+	dmp->failcnt = prm->failcnt;
+}
+
+static int dump_one_bc(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct user_beancounter *bc;
+	struct cpt_beancounter_image *v;
+	int i;
+
+	bc = obj->o_obj;
+	v = cpt_get_buf(ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_UBC;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	if (obj->o_parent != NULL)
+		v->cpt_parent = ((cpt_object_t *)obj->o_parent)->o_pos;
+	else
+		v->cpt_parent = CPT_NULL;
+	v->cpt_id = (obj->o_parent != NULL) ? bc->ub_uid : 0;
+	for (i = 0; i < UB_RESOURCES; i++) {
+		dump_one_bc_parm(v->cpt_parms + i * 2, bc->ub_parms + i, 0);
+		dump_one_bc_parm(v->cpt_parms + i * 2 + 1, bc->ub_store + i, 1);
+	}
+	memset(v->cpt_parms + UB_RESOURCES * 2, 0,
+			sizeof(v->cpt_parms)
+				- UB_RESOURCES * 2 * sizeof(v->cpt_parms[0]));
+
+	cpt_open_object(obj, ctx);
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_close_object(ctx);
+
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_dump_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	int skipped;
+	int top;
+
+	cpt_open_section(ctx, CPT_SECT_UBC);
+
+	do {
+		skipped = 0;
+		top = 0;
+		for_each_object(obj, CPT_OBJ_UBC) {
+			if (obj->o_parent == NULL)
+				top++;
+			if (obj->o_pos != CPT_NULL)
+				continue;
+			if (obj->o_parent != NULL &&
+			    ((cpt_object_t *)obj->o_parent)->o_pos == CPT_NULL)
+				skipped++;
+			else
+				dump_one_bc(obj, ctx);
+		}
+	} while (skipped && (top < 2));
+
+	cpt_close_section(ctx);
+	if (top > 1) {
+		eprintk_ctx("More than one top level ub exist");
+		return -EINVAL;
+	}
+		
+	return 0;
+}
+
+void cpt_finish_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_UBC)
+		put_beancounter(obj->o_obj);
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_ubc.h linux-2.6.9-ve023stab054/kernel/cpt/cpt_ubc.h
--- linux-2.6.9-100.orig/kernel/cpt/cpt_ubc.h	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_ubc.h	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,11 @@
+cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
+__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
+int cpt_dump_ubc(struct cpt_context *ctx);
+
+struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx);
+int rst_undump_ubc(struct cpt_context *ctx);
+
+void cpt_finish_ubc(struct cpt_context *ctx);
+void rst_finish_ubc(struct cpt_context *ctx);
+void copy_one_ubparm(struct ubparm *from, struct ubparm *to, int bc_parm_id);
+void set_one_ubparm_to_max(struct ubparm *ubprm, int bc_parm_id);
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/cpt_x8664.S linux-2.6.9-ve023stab054/kernel/cpt/cpt_x8664.S
--- linux-2.6.9-100.orig/kernel/cpt/cpt_x8664.S	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/cpt_x8664.S	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,65 @@
+#define ASSEMBLY 1
+#include <linux/config.h>
+
+#undef CONFIG_DEBUG_INFO
+	
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/dwarf2.h>
+#include <asm/calling.h>
+#include <asm/offset.h>
+#include <asm/msr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/hw_irq.h>
+#include <asm/errno.h>
+
+	.code64
+
+	.macro FAKE_STACK_FRAME child_rip
+	/* push in order ss, rsp, eflags, cs, rip */
+	xorq %rax, %rax
+	pushq %rax /* ss */
+	pushq %rax /* rsp */
+	pushq $(1<<9) /* eflags - interrupts on */
+	pushq $__KERNEL_CS /* cs */
+	pushq \child_rip /* rip */
+	pushq	%rax /* orig rax */
+	.endm
+
+	.macro UNFAKE_STACK_FRAME
+	addq $8*6, %rsp
+	.endm
+
+ENTRY(asm_kernel_thread)
+	FAKE_STACK_FRAME $child_rip
+	SAVE_ALL
+
+	# rdi: flags, rsi: usp, rdx: will be &pt_regs
+	movq %rdx,%rdi
+	orq  $0x00800000,%rdi
+	movq $-1, %rsi
+	movq %rsp, %rdx
+
+	xorl %r8d,%r8d
+	xorl %r9d,%r9d
+	pushq %rcx
+	call do_fork_pid
+	addq $8, %rsp
+	/* call do_fork */
+	movq %rax,RAX(%rsp)
+	xorl %edi,%edi
+	RESTORE_ALL
+	UNFAKE_STACK_FRAME
+	ret
+
+child_rip:
+	movq %rdi, %rax
+	movq %rsi, %rdi
+	call *%rax
+	xorq %rdi, %rdi
+	xorq %rsi, %rsi
+	call complete_and_exit
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_conntrack.c linux-2.6.9-ve023stab054/kernel/cpt/rst_conntrack.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_conntrack.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_conntrack.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,360 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/unistd.h>
+#include <linux/ve.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+#define ASSERT_READ_LOCK(x) do { } while (0)
+#define ASSERT_WRITE_LOCK(x) do { } while (0)
+
+#include <linux/netfilter_ipv4/listhelp.h>
+
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+struct ct_holder
+{
+	struct ct_holder *next;
+	struct ip_conntrack *ct;
+	int index;
+};
+
+static void decode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple)
+{
+	tuple->dst.ip = v->cpt_dst;
+	tuple->dst.u.all = v->cpt_dstport;
+	tuple->dst.protonum = v->cpt_protonum;
+
+	tuple->src.ip = v->cpt_src;
+	tuple->src.u.all = v->cpt_srcport;
+}
+
+static void decode_nat_manip(struct cpt_nat_manip *v, struct ip_nat_info_manip *manip)
+{
+	manip->direction = v->cpt_direction;
+	manip->hooknum = v->cpt_hooknum;
+	manip->maniptype = v->cpt_maniptype;
+
+	manip->manip.ip = v->cpt_manip_addr;
+	manip->manip.u.all = v->cpt_manip_port;
+}
+
+
+static int undump_expect_list(struct ip_conntrack *ct,
+			      struct cpt_ip_conntrack_image *ci,
+			      loff_t pos, struct ct_holder *ct_list,
+			      cpt_context_t *ctx)
+{
+	loff_t end;
+	int err;
+
+	end = pos + ci->cpt_next;
+	pos += ci->cpt_hdrlen;
+	while (pos < end) {
+		struct cpt_ip_connexpect_image v;
+		struct ip_conntrack_expect *exp;
+		struct ip_conntrack *sibling;
+
+		err = rst_get_object(CPT_OBJ_NET_CONNTRACK_EXPECT, pos, &v, ctx);
+		if (err)
+			return err;
+
+		if (sizeof(v.cpt_help) != sizeof(union ip_conntrack_expect_help)) {
+			eprintk_ctx("conntrack exp->help version mismatch\n");
+			return -EINVAL;
+		}
+
+		sibling = NULL;
+		if (v.cpt_sibling_conntrack) {
+			struct ct_holder *c;
+
+			for (c = ct_list; c; c = c->next) {
+				if (c->index == v.cpt_sibling_conntrack) {
+					sibling = c->ct;
+					break;
+				}
+			}
+			if (!sibling) {
+				eprintk_ctx("lost sibling of expectation\n");
+				return -EINVAL;
+			}
+		}
+
+		WRITE_LOCK(&ip_conntrack_lock);
+
+		/* It is possible. Helper module could be just unregistered,
+		 * if expectation were on the list, it would be destroyed. */
+		if (ct->helper == NULL) {
+			WRITE_UNLOCK(&ip_conntrack_lock);
+			dprintk_ctx("conntrack: no helper and non-trivial expectation\n");
+			continue;
+		}
+
+		exp = ip_conntrack_expect_alloc();
+		if (exp == NULL) {
+			WRITE_UNLOCK(&ip_conntrack_lock);
+			return -ENOMEM;
+		}
+
+		if (ct->helper->timeout && !del_timer(&exp->timeout)) {
+			/* Dying already. We can do nothing. */
+			WRITE_UNLOCK(&ip_conntrack_lock);
+			dprintk_ctx("conntrack expectation is dying\n");
+			continue;
+		}
+
+		decode_tuple(&v.cpt_ct_tuple, &exp->ct_tuple);
+		decode_tuple(&v.cpt_tuple, &exp->tuple);
+		decode_tuple(&v.cpt_mask, &exp->mask);
+
+		exp->seq = v.cpt_seq;
+
+		memcpy(&exp->help, &v.cpt_help, sizeof(exp->help));
+
+		ip_conntrack_expect_insert(exp, ct);
+		if (sibling) {
+			exp->sibling = sibling;
+			sibling->master = exp;
+			LIST_DELETE(&ve_ip_conntrack_expect_list, exp);
+			ct->expecting--;
+			nf_conntrack_get(&master_ct(sibling)->ct_general);
+		} else if (ct->helper->timeout) {
+			exp->timeout.expires = jiffies + v.cpt_timeout;
+			add_timer(&exp->timeout);
+		}
+		WRITE_UNLOCK(&ip_conntrack_lock);
+
+		pos += v.cpt_next;
+	}
+	return 0;
+}
+
+static int undump_one_ct(struct cpt_ip_conntrack_image *ci, loff_t pos,
+			 struct ct_holder **ct_list, cpt_context_t *ctx)
+{
+	int err = 0;
+	struct ip_conntrack *conntrack;
+	int i;
+	struct ct_holder *c;
+
+	c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
+	if (c == NULL)
+		return -ENOMEM;
+
+	conntrack = ip_conntrack_alloc(get_exec_env()->_ip_conntrack->ub);
+	if (!conntrack || IS_ERR(conntrack)) {
+		kfree(c);
+		return -ENOMEM;
+	}
+
+	c->ct = conntrack;
+	c->next = *ct_list;
+	*ct_list = c;
+	c->index = ci->cpt_index;
+
+	decode_tuple(&ci->cpt_tuple[0], &conntrack->tuplehash[0].tuple);
+	conntrack->tuplehash[0].ctrack = conntrack;
+	decode_tuple(&ci->cpt_tuple[1], &conntrack->tuplehash[1].tuple);
+	conntrack->tuplehash[1].ctrack = conntrack;
+
+	conntrack->status = ci->cpt_status;
+
+	memcpy(&conntrack->proto, ci->cpt_proto_data, sizeof(conntrack->proto));
+	memcpy(&conntrack->help, ci->cpt_help_data, sizeof(conntrack->help));
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+	conntrack->nat.masq_index = ci->cpt_masq_index;
+#endif
+	if (ci->cpt_initialized) {
+		conntrack->nat.info.initialized = ci->cpt_initialized;
+		conntrack->nat.info.num_manips = ci->cpt_num_manips;
+		for (i=0; i<conntrack->nat.info.num_manips; i++)
+			decode_nat_manip(&ci->cpt_nat_manips[i], &conntrack->nat.info.manips[i]);
+		conntrack->nat.info.seq[0].correction_pos = ci->cpt_nat_seq[0].cpt_correction_pos;
+		conntrack->nat.info.seq[0].offset_before = ci->cpt_nat_seq[0].cpt_offset_before;
+		conntrack->nat.info.seq[0].offset_after = ci->cpt_nat_seq[0].cpt_offset_after;
+		conntrack->nat.info.seq[1].correction_pos = ci->cpt_nat_seq[1].cpt_correction_pos;
+		conntrack->nat.info.seq[1].offset_before = ci->cpt_nat_seq[1].cpt_offset_before;
+		conntrack->nat.info.seq[1].offset_after = ci->cpt_nat_seq[1].cpt_offset_after;
+		err = ip_nat_install_conntrack(conntrack, ci->cpt_nat_helper);
+		if (err) {
+			eprintk_ctx("conntrack: cannot find NAT helper, some module is not loaded\n");
+			conntrack->nat.info.initialized = 0;
+			conntrack->nat.info.num_manips = 0;
+		}
+	}
+#endif
+
+	WRITE_LOCK(&ip_conntrack_lock);
+
+	if (ci->cpt_ct_helper) {
+		conntrack->helper = ip_ct_find_helper(&conntrack->tuplehash[1].tuple);
+		if (conntrack->helper == NULL) {
+			eprintk_ctx("conntrack: cannot find helper, some module is not loaded\n");
+			err = -EINVAL;
+		}
+	}
+
+	atomic_inc(&(get_exec_env()->_ip_conntrack->_ip_conntrack_count));
+
+	ip_conntrack_hash_insert(conntrack);
+	conntrack->timeout.expires = jiffies + ci->cpt_timeout;
+
+	WRITE_UNLOCK(&ip_conntrack_lock);
+
+	if (err == 0 && ci->cpt_next > ci->cpt_hdrlen)
+		err = undump_expect_list(conntrack, ci, pos, *ct_list, ctx);
+
+	return err;
+}
+
+static unsigned char compat_state_map[] = {
+	TCP_CONNTRACK_NONE,
+	TCP_CONNTRACK_ESTABLISHED,
+	TCP_CONNTRACK_SYN_SENT,
+	TCP_CONNTRACK_SYN_RECV,
+	TCP_CONNTRACK_FIN_WAIT,
+	TCP_CONNTRACK_TIME_WAIT,
+	TCP_CONNTRACK_CLOSE,
+	TCP_CONNTRACK_CLOSE_WAIT,
+	TCP_CONNTRACK_LAST_ACK,
+	TCP_CONNTRACK_LISTEN,
+	TCP_CONNTRACK_MAX
+};
+
+static void
+upgrade_tcp_record(struct cpt_ip_conntrack_image *ci, cpt_context_t *ctx)
+{
+	__u32	old_state = ci->cpt_proto_data[0];
+#if 0
+	/* Right now it is of no use. */
+	__u32	old_ack = ci->cpt_proto_data[1];
+#endif
+	struct ip_ct_tcp * tct = (void*)ci->cpt_proto_data;
+
+	memset(ci->cpt_proto_data, 0, sizeof(ci->cpt_proto_data));
+	tct->state = TCP_CONNTRACK_NONE;
+	if (old_state < sizeof(compat_state_map))
+		tct->state = compat_state_map[old_state];
+	tct->seen[1].td_end	= tct->seen[0].td_end = 0;
+	tct->seen[1].td_maxend	= tct->seen[0].td_maxend = 0;
+	tct->seen[1].td_maxwin	= tct->seen[0].td_maxwin = 1;
+	tct->seen[1].td_scale	= tct->seen[0].td_scale = 0;
+	tct->seen[1].flags	= tct->seen[0].flags = IP_CT_TCP_FLAG_SACK_PERM;
+	tct->seen[1].loose	= tct->seen[0].loose = 3;
+	tct->last_index = 5;
+}
+
+static void
+upgrade_record(struct cpt_ip_conntrack_image *ci, cpt_context_t *ctx)
+{
+	struct cpt_ip_conntrack_image_0 *ci0 = (void*)ci;
+
+	ci->cpt_masq_index = ci0->cpt_masq_index;
+	memmove(ci->cpt_nat_seq, ci0->cpt_nat_seq, sizeof(ci->cpt_nat_seq));
+	memmove(ci->cpt_nat_manips, ci0->cpt_nat_manips, sizeof(ci->cpt_nat_manips));
+	ci->cpt_num_manips = ci0->cpt_num_manips;
+	ci->cpt_initialized = ci0->cpt_initialized;
+	memmove(ci->cpt_help_data, ci0->cpt_help_data, sizeof(ci->cpt_help_data));
+
+	if (ci->cpt_tuple[0].cpt_protonum == IPPROTO_TCP)
+		upgrade_tcp_record(ci, ctx);
+}
+
+int rst_restore_ip_conntrack(struct cpt_context * ctx)
+{
+	int err = 0;
+	loff_t sec = ctx->sections[CPT_SECT_NET_CONNTRACK];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_ip_conntrack_image ci;
+	struct ct_holder *c;
+	struct ct_holder *ct_list = NULL;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	if (sizeof(ci.cpt_proto_data) != sizeof(union ip_conntrack_proto) + 4) {
+		eprintk_ctx("conntrack module ct->proto version mismatch\n");
+		return -EINVAL;
+	}
+	if (sizeof(ci.cpt_help_data) != sizeof(union ip_conntrack_help)) {
+		eprintk_ctx("conntrack module ct->help version mismatch\n");
+		return -EINVAL;
+	}
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_CONNTRACK || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		err = rst_get_object(CPT_OBJ_NET_CONNTRACK, sec, &ci, ctx);
+		if (err)
+			break;
+		if (ctx->image_version < CPT_VERSION_9)
+			upgrade_record(&ci, ctx);
+		err = undump_one_ct(&ci, sec, &ct_list, ctx);
+		if (err)
+			break;
+		sec += ci.cpt_next;
+	}
+
+	while ((c = ct_list) != NULL) {
+		ct_list = c->next;
+		if (c->ct)
+			add_timer(&c->ct->timeout);
+		kfree(c);
+	}
+
+	return err;
+}
+
+#else
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+int rst_restore_ip_conntrack(struct cpt_context * ctx)
+{
+	if (ctx->sections[CPT_SECT_NET_CONNTRACK] != CPT_NULL)
+		return -EINVAL;
+	return 0;
+}
+
+#endif
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_context.c linux-2.6.9-ve023stab054/kernel/cpt/rst_context.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_context.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_context.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,302 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+static ssize_t file_read(void *addr, size_t count, struct cpt_context *ctx)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->read(file, addr, count, &file->f_pos);
+	set_fs(oldfs);
+	if (err != count)
+		return err >= 0 ? -EIO : err;
+	return 0;
+}
+
+static ssize_t file_pread(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->read(file, addr, count, &pos);
+	set_fs(oldfs);
+	if (err != count)
+		return err >= 0 ? -EIO : err;
+	return 0;
+}
+
+static void file_align(struct cpt_context *ctx)
+{
+	struct file *file = ctx->file;
+
+	if (file)
+		file->f_pos = CPT_ALIGN(file->f_pos);
+}
+
+int rst_get_section(int type, struct cpt_context *ctx, loff_t *start, loff_t *end)
+{
+	struct cpt_section_hdr hdr;
+	int err;
+	loff_t pos;
+
+	pos = ctx->sections[type];
+	*start = *end = pos;
+
+	if (pos != CPT_NULL) {
+		if ((err = ctx->pread(&hdr, sizeof(hdr), ctx, pos)) != 0)
+			return err;
+		if (hdr.cpt_section != type || hdr.cpt_hdrlen < sizeof(hdr))
+			return -EINVAL;
+		*start = pos + hdr.cpt_hdrlen;
+		*end = pos + hdr.cpt_next;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(rst_get_section);
+
+void rst_context_init(struct cpt_context *ctx)
+{
+	int i;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	init_MUTEX(&ctx->main_sem);
+	ctx->refcount = 1;
+
+	ctx->current_section = -1;
+	ctx->current_object = -1;
+	ctx->pagesize = PAGE_SIZE;
+	ctx->read = file_read;
+	ctx->pread = file_pread;
+	ctx->align = file_align;
+	for (i=0; i < CPT_SECT_MAX; i++)
+		ctx->sections[i] = CPT_NULL;
+	init_completion(&ctx->pgin_notify);
+	cpt_object_init(ctx);
+}
+
+static int parse_sections(loff_t start, loff_t end, cpt_context_t *ctx)
+{
+	struct cpt_section_hdr h;
+
+	while (start < end) {
+		int err;
+
+		err = ctx->pread(&h, sizeof(h), ctx, start);
+		if (err)
+			return err;
+		if (h.cpt_hdrlen < sizeof(h) ||
+		    h.cpt_next < h.cpt_hdrlen ||
+		    start + h.cpt_next > end)
+			return -EINVAL;
+		if (h.cpt_section >= CPT_SECT_MAX)
+			return -EINVAL;
+		ctx->sections[h.cpt_section] = start;
+		start += h.cpt_next;
+	}
+	return 0;
+}
+
+int rst_open_dumpfile(struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_major_tail *v;
+	struct cpt_major_hdr  h;
+	unsigned long size;
+
+	err = -EBADF;
+	if (!ctx->file)
+		goto err_out;
+
+	err = -ENOMEM;
+	ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
+	if (ctx->tmpbuf == NULL)
+		goto err_out;
+	__cpt_release_buf(ctx);
+
+	size = ctx->file->f_dentry->d_inode->i_size;
+
+	if (size & 7) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	if (size < sizeof(struct cpt_major_hdr) +
+	    sizeof(struct cpt_major_tail)) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	err = ctx->pread(&h, sizeof(h), ctx, 0);
+	if (err) {
+		eprintk_ctx("too short image 1 %d\n", err);
+		goto err_out;
+	}
+	if (h.cpt_signature[0] != CPT_SIGNATURE0 ||
+	    h.cpt_signature[1] != CPT_SIGNATURE1 ||
+	    h.cpt_signature[2] != CPT_SIGNATURE2 ||
+	    h.cpt_signature[3] != CPT_SIGNATURE3) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	ctx->virt_jiffies64 = h.cpt_start_jiffies64;
+	ctx->start_time.tv_sec = h.cpt_start_sec;
+	ctx->start_time.tv_nsec = h.cpt_start_nsec;
+	ctx->kernel_config_flags = h.cpt_kernel_config[0];
+	ctx->iptables_mask = h.cpt_iptables_mask;
+	if (h.cpt_image_version > CPT_VERSION_9_1) {
+		eprintk_ctx("Unknown image version: %x. Can't restore.\n",
+				h.cpt_image_version);
+		err = -EINVAL;
+		goto err_out;
+	}
+	ctx->image_version = h.cpt_image_version;
+	ctx->features = (__u64)((__u64)h.cpt_ve_features2<<32 | h.cpt_ve_features);
+
+	v = cpt_get_buf(ctx);
+	err = ctx->pread(v, sizeof(*v), ctx, size - sizeof(*v));
+	if (err) {
+		eprintk_ctx("too short image 2 %d\n", err);
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+	if (v->cpt_signature[0] != CPT_SIGNATURE0 ||
+	    v->cpt_signature[1] != CPT_SIGNATURE1 ||
+	    v->cpt_signature[2] != CPT_SIGNATURE2 ||
+	    v->cpt_signature[3] != CPT_SIGNATURE3 ||
+	    v->cpt_nsect != CPT_SECT_MAX_INDEX) {
+		err = -EINVAL;
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+	if ((err = parse_sections(h.cpt_hdrlen, size - sizeof(*v) - sizeof(struct cpt_section_hdr), ctx)) < 0) {
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+	ctx->lazypages = v->cpt_lazypages;
+	ctx->tasks64 = v->cpt_64bit;
+	cpt_release_buf(ctx);
+	return 0;
+
+err_out:
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+	return err;
+}
+
+void rst_close_dumpfile(struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		fput(ctx->file);
+		ctx->file = NULL;
+	}
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+}
+
+int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_object_hdr *hdr = tmp;
+	err = ctx->pread(hdr, sizeof(struct cpt_object_hdr), ctx, pos);
+	if (err)
+		return err;
+	if (type > 0 && type != hdr->cpt_object)
+		return -EINVAL;
+	if (hdr->cpt_hdrlen > hdr->cpt_next)
+		return -EINVAL;
+	if (hdr->cpt_hdrlen < sizeof(struct cpt_object_hdr))
+		return -EINVAL;
+	if (size < sizeof(*hdr))
+		return -EINVAL;
+	if (size > hdr->cpt_hdrlen)
+		size = hdr->cpt_hdrlen;
+	if (size > sizeof(*hdr))
+		err = ctx->pread(hdr+1, size - sizeof(*hdr),
+				 ctx, pos + sizeof(*hdr));
+	return err;
+}
+EXPORT_SYMBOL(_rst_get_object);
+
+void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	void *tmp;
+	struct cpt_object_hdr hdr;
+	err = ctx->pread(&hdr, sizeof(hdr), ctx, pos);
+	if (err)
+		return NULL;
+	if (type > 0 && type != hdr.cpt_object)
+		return NULL;
+	if (hdr.cpt_hdrlen > hdr.cpt_next)
+		return NULL;
+	if (hdr.cpt_hdrlen < sizeof(struct cpt_object_hdr))
+		return NULL;
+	tmp = kmalloc(hdr.cpt_hdrlen, GFP_KERNEL);
+	if (!tmp)
+		return NULL;
+	err = ctx->pread(tmp, hdr.cpt_hdrlen, ctx, pos);
+	if (!err)
+		return tmp;
+	kfree(tmp);
+	return NULL;
+}
+EXPORT_SYMBOL(__rst_get_object);
+
+__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_object_hdr hdr;
+	__u8 *name;
+
+	err = rst_get_object(CPT_OBJ_NAME, *pos_p, &hdr, ctx);
+	if (err)
+		return NULL;
+	if (hdr.cpt_next - hdr.cpt_hdrlen > PAGE_SIZE)
+		return NULL;
+	name = (void*)__get_free_page(GFP_KERNEL);
+	if (!name)
+		return NULL;
+	err = ctx->pread(name, hdr.cpt_next - hdr.cpt_hdrlen,
+		   ctx, *pos_p + hdr.cpt_hdrlen);
+	if (err) {
+		free_page((unsigned long)name);
+		return NULL;
+	}
+	*pos_p += hdr.cpt_next;
+	return name;
+}
+
+__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx)
+{
+	return __rst_get_name(&pos, ctx);
+}
+
+void rst_put_name(__u8 *name, struct cpt_context *ctx)
+{
+	unsigned long addr = (unsigned long)name;
+
+	if (addr)
+		free_page(addr&~(PAGE_SIZE-1));
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_epoll.c linux-2.6.9-ve023stab054/kernel/cpt/rst_epoll.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_epoll.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_epoll.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,162 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/eventpoll.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+/* Those funcations are static in fs/eventpoll.c */
+extern struct file_operations eventpoll_fops;
+extern int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+		     struct file *tfile, int fd);
+extern struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+extern void ep_release_epitem(struct epitem *epi);
+
+
+struct file *cpt_open_epolldev(struct cpt_file_image *fi,
+			       unsigned flags,
+			       struct cpt_context *ctx)
+{
+	struct file *file;
+	int efd;
+
+	/* Argument "size" is ignored, use just 1 */
+	efd = sys_epoll_create(1);
+	if (efd < 0)
+		return ERR_PTR(efd);
+
+	file = fget(efd);
+	sys_close(efd);
+	return file;
+}
+
+static int restore_one_epoll(cpt_object_t *obj,
+			     loff_t pos,
+			     struct cpt_epoll_image *ebuf,
+			     cpt_context_t *ctx)
+{
+	int err = 0;
+	loff_t endpos;
+	struct file *file = obj->o_obj;
+	struct eventpoll *ep;
+
+	if (file->f_op != &eventpoll_fops) {
+		eprintk_ctx("bad epoll file\n");
+		return -EINVAL;
+	}
+
+	ep = file->private_data;
+
+	if (unlikely(ep == NULL)) {
+		eprintk_ctx("bad epoll device\n");
+		return -EINVAL;
+	}
+
+	endpos = pos + ebuf->cpt_next;
+	pos += ebuf->cpt_hdrlen;
+	while (pos < endpos) {
+		struct cpt_epoll_file_image efi;
+		struct epoll_event epds;
+		
+		cpt_object_t *tobj;
+
+		err = rst_get_object(CPT_OBJ_EPOLL_FILE, pos, &efi, ctx);
+		if (err)
+			return err;
+		tobj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, efi.cpt_file, ctx);
+		if (!tobj) {
+			eprintk_ctx("epoll file not found\n");
+			return -EINVAL;
+		}
+		epds.events = efi.cpt_events;
+		epds.data = efi.cpt_data;
+		down_write(&ep->sem);
+		err = ep_insert(ep, &epds, tobj->o_obj, efi.cpt_fd);
+		if (!err) {
+			struct epitem *epi;
+			epi = ep_find(ep, tobj->o_obj, efi.cpt_fd);
+			if (epi) {
+				epi->revents = efi.cpt_revents;
+				if (efi.cpt_ready) {
+					unsigned long flags;
+					write_lock_irqsave(&ep->lock, flags);
+					if (list_empty(&epi->rdllink))
+						list_add_tail(&epi->rdllink, &ep->rdllist);
+					write_unlock_irqrestore(&ep->lock, flags);
+				}
+				ep_release_epitem(epi);
+			}
+		}
+		up_write(&ep->sem);
+		if (err)
+			break;
+		pos += efi.cpt_next;
+	}
+	return err;
+}
+
+int rst_eventpoll(cpt_context_t *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_EPOLL];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_EPOLL || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		cpt_object_t *obj;
+		struct cpt_epoll_image *ebuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_EPOLL, sec, ebuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, ebuf->cpt_file, ctx);
+		if (obj == NULL) {
+			eprintk_ctx("cannot find epoll file object\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		err = restore_one_epoll(obj, sec, ebuf, ctx);
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		sec += ebuf->cpt_next;
+	}
+
+	return 0;
+	
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_files.c linux-2.6.9-ve023stab054/kernel/cpt/rst_files.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_files.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_files.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,1461 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/mount.h>
+#include <linux/tty.h>
+#include <linux/namei.h>
+#include <linux/vmalloc.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <asm/uaccess.h>
+#include <ub/ub_mem.h>
+#include <linux/cpt_image.h>
+#include <linux/namespace.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+
+#include "cpt_syscalls.h"
+
+
+struct filejob {
+	struct filejob *next;
+	int	pid;
+	loff_t	fdi;
+};
+
+static int rst_filejob_queue(loff_t pos, cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	j = kmalloc(sizeof(*j), GFP_KERNEL);
+	if (j == NULL)
+		return -ENOMEM;
+	j->pid = current->pid;
+	j->fdi = pos;
+	j->next = ctx->filejob_queue;
+	ctx->filejob_queue = j;
+	return 0;
+}
+
+/* Sorta ugly... Multiple readers/writers of named pipe rewrite buffer
+ * many times. We need to mark it in CPT_OBJ_INODE table in some way.
+ */
+static int fixup_pipe_data(struct file *file, struct cpt_file_image *fi,
+			   struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	struct cpt_inode_image ii;
+	struct cpt_obj_bits b;
+	int err;
+
+	if (!S_ISFIFO(ino->i_mode)) {
+		eprintk_ctx("fixup_pipe_data: not a pipe %Ld\n", fi->cpt_inode);
+		return -EINVAL;
+	}
+	if (fi->cpt_inode == CPT_NULL)
+		return 0;
+
+	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
+	if (err)
+		return err;
+
+	if (ii.cpt_next <= ii.cpt_hdrlen)
+		return 0;
+
+	err = rst_get_object(CPT_OBJ_BITS, fi->cpt_inode + ii.cpt_hdrlen, &b, ctx);
+	if (err)
+		return err;
+
+	if (b.cpt_size == 0)
+		return 0;
+
+	if (b.cpt_size > PAGE_SIZE) {
+		eprintk_ctx("too large pipe buffer to restore with this kernel\n");
+		return -EINVAL;
+	}
+
+	err = ctx->pread(PIPE_BASE(*ino), b.cpt_size, ctx,
+			 fi->cpt_inode + ii.cpt_hdrlen + b.cpt_hdrlen);
+
+	down(PIPE_SEM(*ino));
+	PIPE_LEN(*ino) = b.cpt_size;
+	PIPE_START(*ino) = 0;
+	up(PIPE_SEM(*ino));
+
+	return err;
+}
+
+static int make_flags(struct cpt_file_image *fi)
+{
+	int flags = O_NOFOLLOW;
+	switch (fi->cpt_mode&(FMODE_READ|FMODE_WRITE)) {
+	case FMODE_READ|FMODE_WRITE:
+		flags |= O_RDWR; break;
+	case FMODE_WRITE:
+		flags |= O_WRONLY; break;
+	case FMODE_READ:
+		flags |= O_RDONLY; break;
+	default: break;
+	}
+	flags |= fi->cpt_flags&~(O_ACCMODE|O_CREAT|O_TRUNC|O_EXCL|FASYNC);
+	flags |= O_NONBLOCK|O_NOCTTY;
+	return flags;
+}
+
+static struct file *open_pipe(char *name,
+			      struct cpt_file_image *fi,
+			      unsigned flags,
+			      struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct cpt_inode_image ii;
+	struct file *rf, *wf;
+
+	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
+	if (err)
+		return ERR_PTR(err);
+
+	if (ii.cpt_sb == FSMAGIC_PIPEFS) {
+		int pfd[2];
+
+		if ((err = sc_pipe(pfd)) < 0)
+			return ERR_PTR(err);
+
+		rf = fcheck(pfd[0]);
+		wf = fcheck(pfd[1]);
+		get_file(rf);
+		get_file(wf);
+		sc_close(pfd[0]);
+		sc_close(pfd[1]);
+
+		if (fi->cpt_mode&FMODE_READ) {
+			struct file *tf;
+			tf = wf; wf = rf; rf = tf;
+		}
+	} else {
+		if (fi->cpt_mode&FMODE_READ) {
+			rf = filp_open(name, flags, 0);
+			if (IS_ERR(rf)) {
+				dprintk_ctx("filp_open\n");
+				return rf;
+			}
+			dprintk_ctx(CPT_FID "open RDONLY fifo ino %Ld %p %x\n", CPT_TID(current), fi->cpt_inode, rf, rf->f_dentry->d_inode->i_mode);
+			return rf;
+		}
+
+		dprintk_ctx(CPT_FID "open WRONLY fifo ino %Ld\n", CPT_TID(current), fi->cpt_inode);
+
+		rf = filp_open(name, O_RDWR|O_NONBLOCK, 0);
+		if (IS_ERR(rf))
+			return rf;
+		wf = dentry_open(dget(rf->f_dentry),
+				 mntget(rf->f_vfsmnt), flags);
+	}
+
+	/* Add pipe inode to obj table. */
+	obj = cpt_object_add(CPT_OBJ_INODE, wf->f_dentry->d_inode, ctx);
+	if (obj == NULL) {
+		fput(rf); fput(wf);
+		return ERR_PTR(-ENOMEM);
+	}
+	cpt_obj_setpos(obj, fi->cpt_inode, ctx);
+	obj->o_parent = rf;
+
+	/* Add another side of pipe to obj table, it will not be used
+	 * (o_pos = PT_NULL), another processes opeining pipe will find
+	 * inode and open it with dentry_open(). */
+	obj = cpt_object_add(CPT_OBJ_FILE, rf, ctx);
+	if (obj == NULL) {
+		fput(wf);
+		return ERR_PTR(-ENOMEM);
+	}
+	return wf;
+}
+
+static struct file *open_special(struct cpt_file_image *fi,
+				 unsigned flags,
+				 int deleted,
+				 struct cpt_context *ctx)
+{
+	struct cpt_inode_image *ii;
+	struct file *file;
+
+	/* Directories and named pipes are not special actually */
+	if (S_ISDIR(fi->cpt_i_mode) || S_ISFIFO(fi->cpt_i_mode))
+		return NULL;
+
+	/* No support for block devices at the moment. */
+	if (S_ISBLK(fi->cpt_i_mode))
+		return ERR_PTR(-EINVAL);
+
+	if (S_ISSOCK(fi->cpt_i_mode)) {
+		eprintk_ctx("bug: socket is not open\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Support only (some) character devices at the moment. */
+	if (!S_ISCHR(fi->cpt_i_mode))
+		return ERR_PTR(-EINVAL);
+
+	ii = __rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, ctx);
+	if (ii == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Do not worry about this right now. /dev/null,zero,*random are here.
+	 * To prohibit at least /dev/mem?
+	 */
+	if (MAJOR(ii->cpt_rdev) == MEM_MAJOR) {
+		kfree(ii);
+		return NULL;
+	}
+
+	file = rst_open_tty(fi, ii, flags, ctx);
+	kfree(ii);
+	return file;
+}
+
+static int restore_posix_lock(struct file *file, struct cpt_flock_image *fli, cpt_context_t *ctx)
+{
+	struct file_lock lock;
+	cpt_object_t *obj;
+
+	memset(&lock, 0, sizeof(lock));
+	lock.fl_type = fli->cpt_type;
+	lock.fl_flags = fli->cpt_flags & ~FL_SLEEP;
+	lock.fl_start = fli->cpt_start;
+	lock.fl_end = fli->cpt_end;
+	obj = lookup_cpt_obj_byindex(CPT_OBJ_FILES, fli->cpt_owner, ctx);
+	if (!obj) {
+		eprintk_ctx("unknown lock owner %d\n", (int)fli->cpt_owner);
+		return -EINVAL;
+	}
+	lock.fl_owner = obj->o_obj;
+	lock.fl_pid = vpid_to_pid(fli->cpt_pid);
+	if (lock.fl_pid < 0) {
+		eprintk_ctx("unknown lock pid %d\n", lock.fl_pid);
+		return -EINVAL;
+	}
+	lock.fl_file = file;
+
+	if (lock.fl_owner == NULL)
+		eprintk_ctx("no lock owner\n");
+	return posix_lock_file(file, &lock);
+}
+
+static int restore_flock(struct file *file, struct cpt_flock_image *fli,
+			 cpt_context_t *ctx)
+{
+	int cmd, err, fd;
+	fd = get_unused_fd();
+	if (fd < 0) {
+		eprintk_ctx("BSD flock cannot be restored\n");
+		return fd;
+	}
+	get_file(file);
+	fd_install(fd, file);
+	if (fli->cpt_type == F_RDLCK) {
+		cmd = LOCK_SH;
+	} else if (fli->cpt_type == F_WRLCK) {
+		cmd = LOCK_EX;
+	} else {
+		eprintk_ctx("flock flavor is unknown: %u\n", fli->cpt_type);
+		sc_close(fd);
+		return -EINVAL;
+	}
+
+	err = sc_flock(fd, LOCK_NB | cmd);
+	sc_close(fd);
+	return err;
+}
+
+
+static int fixup_posix_locks(struct file *file,
+			     struct cpt_file_image *fi,
+			     loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end;
+	struct cpt_flock_image fli;
+
+	end = pos + fi->cpt_next;
+	pos += fi->cpt_hdrlen;
+	while (pos < end) {
+		err = rst_get_object(-1, pos, &fli, ctx);
+		if (err)
+			return err;
+		if (fli.cpt_object == CPT_OBJ_FLOCK &&
+		    (fli.cpt_flags&FL_POSIX)) {
+			err = restore_posix_lock(file, &fli, ctx);
+			if (err)
+				return err;
+			dprintk_ctx("posix lock restored\n");
+		}
+		pos += fli.cpt_next;
+	}
+	return 0;
+}
+
+int rst_posix_locks(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		struct cpt_file_image fi;
+
+		if (obj->o_pos == CPT_NULL)
+			continue;
+
+		err = rst_get_object(CPT_OBJ_FILE, obj->o_pos, &fi, ctx);
+		if (err < 0)
+			return err;
+		if (fi.cpt_next > fi.cpt_hdrlen)
+			fixup_posix_locks(file, &fi, obj->o_pos, ctx);
+	}
+	return 0;
+}
+
+static int fixup_flocks(struct file *file,
+			struct cpt_file_image *fi,
+			loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end;
+	struct cpt_flock_image fli;
+
+	end = pos + fi->cpt_next;
+	pos += fi->cpt_hdrlen;
+	while (pos < end) {
+		err = rst_get_object(-1, pos, &fli, ctx);
+		if (err)
+			return err;
+		if (fli.cpt_object == CPT_OBJ_FLOCK &&
+		    (fli.cpt_flags&FL_FLOCK)) {
+			err = restore_flock(file, &fli, ctx);
+			if (err)
+				return err;
+			dprintk_ctx("bsd lock restored\n");
+		}
+		pos += fli.cpt_next;
+	}
+	return 0;
+}
+
+
+static int fixup_reg_data(struct file *file, loff_t pos, loff_t end,
+			  struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_page_block pgb;
+	ssize_t (*do_write)(struct file *, const char __user *, size_t, loff_t *ppos);
+
+	do_write = file->f_op->write;
+	if (do_write == NULL) {
+		eprintk_ctx("no write method. Cannot restore contents of the file.\n");
+		return -EINVAL;
+	}
+
+	atomic_inc(&file->f_count);
+
+	while (pos < end) {
+		loff_t opos;
+		loff_t ipos;
+		int count;
+
+		err = rst_get_object(CPT_OBJ_PAGES, pos, &pgb, ctx);
+		if (err)
+			goto out;
+		dprintk_ctx("restoring file data block: %08x-%08x\n",
+		       (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
+		ipos = pos + pgb.cpt_hdrlen;
+		opos = pgb.cpt_start;
+		count = pgb.cpt_end-pgb.cpt_start;
+		while (count > 0) {
+			mm_segment_t oldfs;
+			int copy = count;
+
+			if (copy > PAGE_SIZE)
+				copy = PAGE_SIZE;
+			(void)cpt_get_buf(ctx);
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
+			set_fs(oldfs);
+			if (err) {
+				__cpt_release_buf(ctx);
+				goto out;
+			}
+			if (!(file->f_mode & FMODE_WRITE)) {
+				fput(file);
+				file = dentry_open(dget(file->f_dentry),
+						   mntget(file->f_vfsmnt),
+						   O_WRONLY | O_LARGEFILE);
+				if (IS_ERR(file)) {
+					__cpt_release_buf(ctx);
+					return PTR_ERR(file);
+				}
+			}
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			ipos += copy;
+			err = do_write(file, ctx->tmpbuf, copy, &opos);
+			set_fs(oldfs);
+			__cpt_release_buf(ctx);
+			if (err != copy) {
+				if (err >= 0)
+					err = -EIO;
+				goto out;
+			}
+			count -= copy;
+		}
+		pos += pgb.cpt_next;
+	}
+	err = 0;
+
+out:
+	fput(file);
+	return err;
+}
+
+
+static int fixup_file_content(struct file **file_p, struct cpt_file_image *fi,
+			      struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_inode_image ii;
+	struct file *file = *file_p;
+	struct iattr newattrs;
+
+	if (!S_ISREG(fi->cpt_i_mode))
+		return 0;
+
+	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
+	if (err)
+		return err;
+
+	if (file == NULL) {
+		file = shmem_file_setup("dev/zero", ii.cpt_size, 0);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+		*file_p = file;
+	}
+
+	if (ii.cpt_next > ii.cpt_hdrlen) {
+		err = fixup_reg_data(file, fi->cpt_inode+ii.cpt_hdrlen,
+				     fi->cpt_inode+ii.cpt_next, ctx);
+		if (err)
+			return err;
+	}
+
+	/* vzfs can not track ATTR_SIZE and ATTR_XTIME set at one time */
+	down(&file->f_dentry->d_inode->i_sem);
+	/* stage 1 - update size like do_truncate does */
+	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+	newattrs.ia_size = ii.cpt_size;
+	cpt_timespec_import(&newattrs.ia_ctime, ii.cpt_ctime);
+	err = notify_change(file->f_dentry, &newattrs);
+	if (err)
+		goto out;
+
+	/* stage 2 - update times, owner and mode */
+	newattrs.ia_valid = ATTR_MTIME | ATTR_ATIME |
+		ATTR_ATIME_SET | ATTR_MTIME_SET |
+		ATTR_MODE | ATTR_UID | ATTR_GID;
+	newattrs.ia_uid = ii.cpt_uid;
+	newattrs.ia_gid = ii.cpt_gid;
+	newattrs.ia_mode = file->f_dentry->d_inode->i_mode & S_IFMT;
+	newattrs.ia_mode |= (ii.cpt_mode & ~S_IFMT);
+	cpt_timespec_import(&newattrs.ia_atime, ii.cpt_atime);
+	cpt_timespec_import(&newattrs.ia_mtime, ii.cpt_mtime);
+	err = notify_change(file->f_dentry, &newattrs);
+
+out:
+	up(&file->f_dentry->d_inode->i_sem);
+	return err;
+}
+
+static int fixup_file_flags(struct file *file, struct cpt_file_image *fi,
+			    int was_dentry_open, loff_t pos,
+			    cpt_context_t *ctx)
+{
+	if (fi->cpt_pos != file->f_pos) {
+		int err = -ESPIPE;
+		if (file->f_op->llseek)
+			err = file->f_op->llseek(file, fi->cpt_pos, 0);
+		if (err < 0) {
+			dprintk_ctx("file %Ld lseek %Ld - %Ld\n", pos, file->f_pos, fi->cpt_pos);
+			file->f_pos = fi->cpt_pos;
+		}
+	}
+	file->f_uid = fi->cpt_uid;
+	file->f_gid = fi->cpt_gid;
+	file->f_owner.pid = 0;
+	if (fi->cpt_fown_pid) {
+		file->f_owner.pid = comb_vpid_to_pid(fi->cpt_fown_pid);
+		if (file->f_owner.pid == 0) {
+			wprintk_ctx("fixup_file_flags: owner %d does not exist anymore\n", file->f_owner.pid);
+			return -EINVAL;
+		}
+	}
+	file->f_owner.uid = fi->cpt_fown_uid;
+	file->f_owner.euid = fi->cpt_fown_euid;
+	file->f_owner.signum = fi->cpt_fown_signo;
+
+	if (file->f_mode != fi->cpt_mode) {
+		if (was_dentry_open &&
+		    ((file->f_mode^fi->cpt_mode)&(FMODE_PREAD|FMODE_LSEEK))) {
+			file->f_mode &= ~(FMODE_PREAD|FMODE_LSEEK);
+			file->f_mode |= fi->cpt_mode&(FMODE_PREAD|FMODE_LSEEK);
+		}
+		if (file->f_mode != fi->cpt_mode)
+			wprintk_ctx("file %ld mode mismatch %08x %08x\n", (long)pos, file->f_mode, fi->cpt_mode);
+	}
+	if (file->f_flags != fi->cpt_flags) {
+		if (!(fi->cpt_flags&O_NOFOLLOW))
+			file->f_flags &= ~O_NOFOLLOW;
+		if ((file->f_flags^fi->cpt_flags)&O_NONBLOCK) {
+			file->f_flags &= ~O_NONBLOCK;
+			file->f_flags |= fi->cpt_flags&O_NONBLOCK;
+		}
+		if (fi->cpt_flags&FASYNC) {
+			if (fi->cpt_fown_fd == -1) {
+				wprintk_ctx("No fd for FASYNC\n");
+				return -EINVAL;
+			} else if (file->f_op && file->f_op->fasync) {
+				if (file->f_op->fasync(fi->cpt_fown_fd, file, 1) < 0) {
+					wprintk_ctx("FASYNC problem\n");
+					return -EINVAL;
+				} else {
+					file->f_flags |= FASYNC;
+				}
+			}
+		}
+		if (file->f_flags != fi->cpt_flags) {
+			eprintk_ctx("file %ld flags mismatch %08x %08x\n", (long)pos, file->f_flags, fi->cpt_flags);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static struct file *
+open_deleted(char *name, unsigned flags, struct cpt_file_image *fi,
+	     cpt_context_t *ctx)
+{
+	struct file * file;
+	char *suffix = NULL;
+	int attempt = 0;
+	int tmp_pass = 0;
+	mode_t mode = fi->cpt_i_mode;
+
+	/* Strip (deleted) part... */
+	if (strlen(name) > strlen(" (deleted)")) {
+		if (strcmp(name + strlen(name) - strlen(" (deleted)"), " (deleted)") == 0) {
+			suffix = &name[strlen(name) - strlen(" (deleted)")];
+			*suffix = 0;
+		} else if (memcmp(name, "(deleted) ", strlen("(deleted) ")) == 0) {
+			memmove(name, name + strlen("(deleted) "), strlen(name) - strlen(" (deleted)") + 1);
+			suffix = name + strlen(name);
+		}
+	}
+
+try_again:
+	for (;;) {
+		if (attempt) {
+			if (attempt > 1000) {
+				eprintk_ctx("open_deleted: failed after %d attempts\n", attempt);
+				return ERR_PTR(-EEXIST);
+			}
+			if (suffix == NULL) {
+				eprintk_ctx("open_deleted: no suffix\n");
+				return ERR_PTR(-EEXIST);
+			}
+			sprintf(suffix, ".%08x", (unsigned)((xtime.tv_nsec>>10)+attempt));
+		}
+		attempt++;
+
+		if (S_ISFIFO(mode)) {
+			int err;
+			err = sc_mknod(name, S_IFIFO|(mode&017777), 0);
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = open_pipe(name, fi, flags, ctx);
+			sc_unlink(name);
+		} else if (S_ISCHR(mode)) {
+			int err;
+			struct cpt_inode_image *ii;
+
+			ii = __rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, ctx);
+			if (ii == NULL)
+				return ERR_PTR(-ENOMEM);
+			err = sc_mknod(name, S_IFCHR|(mode&017777), new_encode_dev(ii->cpt_rdev));
+			kfree(ii);
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = filp_open(name, flags, mode&017777);
+			sc_unlink(name);
+		} else if (S_ISDIR(mode)) {
+			int err;
+			err = sc_mkdir(name, mode&017777);
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = filp_open(name, flags, mode&017777);
+			sc_rmdir(name);
+		} else {
+			file = filp_open(name, O_CREAT|O_EXCL|flags, mode&017777);
+			if (IS_ERR(file)) {
+				if (PTR_ERR(file) == -EEXIST)
+					continue;
+				if (!tmp_pass)
+					goto change_dir;
+			} else {
+				sc_unlink(name);
+			}
+		}
+		break;
+	}
+
+	if (IS_ERR(file)) {
+		eprintk_ctx("filp_open %s: %ld\n", name, PTR_ERR(file));
+		return file;
+	} else {
+		dprintk_ctx("deleted file created as %s, %p, %x\n", name, file, file->f_dentry->d_inode->i_mode);
+	}
+	return file;
+
+change_dir:
+	sprintf(name, "/tmp/rst%u", current->pid);
+	suffix = name + strlen(name);
+	attempt = 1;
+	tmp_pass = 1;
+	goto try_again;
+}
+
+struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx)
+{
+	int err;
+	int was_dentry_open = 0;
+	cpt_object_t *obj;
+	cpt_object_t *iobj;
+	struct cpt_file_image fi;
+	__u8 *name = NULL;
+	struct file *file;
+	int flags;
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, pos, ctx);
+	if (obj) {
+		file = obj->o_obj;
+		if (obj->o_index >= 0) {
+			dprintk_ctx("file is attached to a socket\n");
+			err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
+			if (err < 0)
+				goto err_out;
+			fixup_file_flags(file, &fi, 0, pos, ctx);
+		}
+		get_file(file);
+		return file;
+	}
+
+	err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
+	if (err < 0)
+		goto err_out;
+
+	flags = make_flags(&fi);
+
+	/* Easy way, inode has been already open. */
+	if (fi.cpt_inode != CPT_NULL &&
+	    !(fi.cpt_lflags & CPT_DENTRY_CLONING) &&
+	    (iobj = lookup_cpt_obj_bypos(CPT_OBJ_INODE, fi.cpt_inode, ctx)) != NULL &&
+	    iobj->o_parent) {
+		struct file *filp = iobj->o_parent;
+		file = dentry_open(dget(filp->f_dentry),
+				   mntget(filp->f_vfsmnt), flags);
+		dprintk_ctx("rst_file: file obtained by dentry_open\n");
+		was_dentry_open = 1;
+		goto map_file;
+	}
+
+	if (fi.cpt_next > fi.cpt_hdrlen)
+		name = rst_get_name(pos + sizeof(fi), ctx);
+
+	if (!name) {
+		eprintk_ctx("no name for file?\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if (fi.cpt_lflags & CPT_DENTRY_DELETED) {
+		if (fi.cpt_inode == CPT_NULL) {
+			eprintk_ctx("deleted file and no inode.\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		/* One very special case... */
+		if (S_ISREG(fi.cpt_i_mode) &&
+		    (!name[0] || strcmp(name, "/dev/zero (deleted)") == 0)) {
+			/* MAP_ANON|MAP_SHARED mapping.
+			 * kernel makes this damn ugly way, when file which
+			 * is passed to mmap by user does not match
+			 * file finally attached to VMA. Ok, rst_mm
+			 * has to take care of this. Otherwise, it will fail.
+			 */
+			file = NULL;
+		} else if (S_ISREG(fi.cpt_i_mode) ||
+			   S_ISCHR(fi.cpt_i_mode) ||
+			   S_ISFIFO(fi.cpt_i_mode) ||
+			   S_ISDIR(fi.cpt_i_mode)) {
+			if (S_ISCHR(fi.cpt_i_mode)) {
+				file = open_special(&fi, flags, 1, ctx);
+				if (file != NULL)
+					goto map_file;
+			}
+			file = open_deleted(name, flags, &fi, ctx);
+			if (IS_ERR(file))
+				goto out;
+		} else {
+			eprintk_ctx("not a regular deleted file.\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		err = fixup_file_content(&file, &fi, ctx);
+		if (err)
+			goto err_put;
+		goto map_file;
+	} else {
+		if (!name[0]) {
+			eprintk_ctx("empty name for file?\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+		if ((fi.cpt_lflags & CPT_DENTRY_EPOLL) &&
+		    (file = cpt_open_epolldev(&fi, flags, ctx)) != NULL)
+			goto map_file;
+		if (S_ISFIFO(fi.cpt_i_mode) &&
+		    (file = open_pipe(name, &fi, flags, ctx)) != NULL)
+			goto map_file;
+		if (!S_ISREG(fi.cpt_i_mode) &&
+		    (file = open_special(&fi, flags, 0, ctx)) != NULL)
+			goto map_file;
+	}
+
+	file = filp_open(name, flags, 0);
+
+map_file:
+	if (!IS_ERR(file)) {
+		fixup_file_flags(file, &fi, was_dentry_open, pos, ctx);
+
+		if (S_ISFIFO(fi.cpt_i_mode) && !was_dentry_open) {
+			err = fixup_pipe_data(file, &fi, ctx);
+			if (err)
+				goto err_put;
+		}
+
+		/* This is very special hack. Logically, cwd/root are
+		 * nothing but open directories. Nevertheless, this causes
+		 * failures of restores, when number of open files in VE
+		 * is close to limit. So, if it is rst_file() of cwd/root
+		 * (fd = -2) and the directory is not deleted, we skip
+		 * adding files to object table. If the directory is
+		 * not unlinked, this cannot cause any problems.
+		 */
+		if (fd != -2 ||
+		    !S_ISDIR(file->f_dentry->d_inode->i_mode) ||
+		    (fi.cpt_lflags & CPT_DENTRY_DELETED)) {
+			obj = cpt_object_get(CPT_OBJ_FILE, file, ctx);
+			if (!obj) {
+				obj = cpt_object_add(CPT_OBJ_FILE, file, ctx);
+				if (obj)
+					get_file(file);
+			}
+			if (obj)
+				cpt_obj_setpos(obj, pos, ctx);
+
+			obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+			if (obj) {
+				cpt_obj_setpos(obj, fi.cpt_inode, ctx);
+				if (!obj->o_parent || !(fi.cpt_lflags & CPT_DENTRY_DELETED))
+					obj->o_parent = file;
+			}
+		}
+
+		if (fi.cpt_next > fi.cpt_hdrlen) {
+			err = fixup_flocks(file, &fi, pos, ctx);
+			if (err)
+				goto err_put;
+		}
+	} else {
+		if (fi.cpt_lflags & CPT_DENTRY_PROC) {
+			dprintk_ctx("rst_file /proc delayed\n");
+			file = NULL;
+		} else if (name)
+			eprintk_ctx("can't open file %s\n", name);
+	}
+
+out:
+	if (name)
+		rst_put_name(name, ctx);
+	return file;
+
+err_put:
+	if (file)
+		fput(file);
+err_out:
+	if (name)
+		rst_put_name(name, ctx);
+	return ERR_PTR(err);
+}
+
+
+__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+	if (ti->cpt_files == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx))
+		flag |= CLONE_FILES;
+	if (ti->cpt_fs == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx))
+		flag |= CLONE_FS;
+	return flag;
+}
+
+static void local_close_files(struct files_struct * files)
+{
+	int i, j;
+
+	j = 0;
+	for (;;) {
+		unsigned long set;
+		i = j * __NFDBITS;
+		if (i >= files->max_fdset || i >= files->max_fds)
+			break;
+		set = files->open_fds->fds_bits[j];
+		while (set) {
+			if (set & 1) {
+				struct file * file = xchg(&files->fd[i], NULL);
+				if (file)
+					filp_close(file, files);
+			}
+			i++;
+			set >>= 1;
+		}
+		files->open_fds->fds_bits[j] = 0;
+		files->close_on_exec->fds_bits[j] = 0;
+		j++;
+	}
+}
+
+
+int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct cpt_files_struct_image fi;
+	struct files_struct *f = current->files;
+	cpt_object_t *obj;
+	loff_t pos, endpos;
+	int err;
+
+	if (ti->cpt_files == CPT_NULL) {
+		current->files = NULL;
+		if (f)
+			put_files_struct(f);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			put_files_struct(f);
+			f = obj->o_obj;
+			atomic_inc(&f->count);
+			current->files = f;
+		}
+		return 0;
+	}
+
+	err = rst_get_object(CPT_OBJ_FILES, ti->cpt_files, &fi, ctx);
+	if (err)
+		return err;
+
+	local_close_files(f);
+
+	if (fi.cpt_max_fds > f->max_fdset) {
+		spin_lock(&f->file_lock);
+		err = expand_fdset(f, fi.cpt_max_fds-1);
+		spin_unlock(&f->file_lock);
+		if (err)
+			return err;
+	}
+	if (fi.cpt_max_fds > f->max_fds) {
+		spin_lock(&f->file_lock);
+		err = expand_fd_array(f, fi.cpt_max_fds-1);
+		spin_unlock(&f->file_lock);
+		if (err)
+			return err;
+	}
+
+	pos = ti->cpt_files + fi.cpt_hdrlen;
+	endpos = ti->cpt_files + fi.cpt_next;
+	while (pos < endpos) {
+		struct cpt_fd_image fdi;
+		struct file *filp;
+
+		err = rst_get_object(CPT_OBJ_FILEDESC, pos, &fdi, ctx);
+		if (err)
+			return err;
+		filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
+		if (IS_ERR(filp)) {
+			eprintk_ctx("rst_file: %ld %Lu\n", PTR_ERR(filp), fdi.cpt_file);
+			return PTR_ERR(filp);
+		}
+		if (filp == NULL) {
+			int err = rst_filejob_queue(pos, ctx);
+			if (err)
+				return err;
+		} else {
+			if (fdi.cpt_fd >= f->max_fds) BUG();
+			f->fd[fdi.cpt_fd] = filp;
+			FD_SET(fdi.cpt_fd, f->open_fds);
+			if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
+				FD_SET(fdi.cpt_fd, f->close_on_exec);
+		}
+		pos += fdi.cpt_next;
+	}
+	f->next_fd = fi.cpt_next_fd;
+
+	obj = cpt_object_add(CPT_OBJ_FILES, f, ctx);
+	if (obj) {
+		cpt_obj_setpos(obj, ti->cpt_files, ctx);
+		cpt_obj_setindex(obj, fi.cpt_index, ctx);
+	}
+	return 0;
+}
+
+int rst_do_filejobs(cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	while ((j = ctx->filejob_queue) != NULL) {
+		int err;
+		task_t *tsk;
+		struct cpt_fd_image fdi;
+		struct file *filp;
+
+		read_lock(&tasklist_lock);
+		tsk = find_task_by_pid_ve(j->pid);
+		if (tsk)
+			get_task_struct(tsk);
+		read_unlock(&tasklist_lock);
+		if (!tsk)
+			return -EINVAL;
+
+		err = rst_get_object(CPT_OBJ_FILEDESC, j->fdi, &fdi, ctx);
+		if (err) {
+			put_task_struct(tsk);
+			return err;
+		}
+
+		if (fdi.cpt_fd >= tsk->files->max_fds) BUG();
+		if (tsk->files->fd[fdi.cpt_fd] ||
+		    FD_ISSET(fdi.cpt_fd, tsk->files->open_fds)) {
+			eprintk_ctx("doing filejob %Ld: fd is busy\n", j->fdi);
+			put_task_struct(tsk);
+			return -EBUSY;
+		}
+
+		filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
+		if (IS_ERR(filp)) {
+			eprintk_ctx("rst_do_filejobs: 1: %ld %Lu\n", PTR_ERR(filp), fdi.cpt_file);
+			put_task_struct(tsk);
+			return PTR_ERR(filp);
+		}
+		if (fdi.cpt_fd >= tsk->files->max_fds) BUG();
+		tsk->files->fd[fdi.cpt_fd] = filp;
+		FD_SET(fdi.cpt_fd, tsk->files->open_fds);
+		if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
+			FD_SET(fdi.cpt_fd, tsk->files->close_on_exec);
+
+		dprintk_ctx("filejob %Ld done\n", j->fdi);
+
+		put_task_struct(tsk);
+		ctx->filejob_queue = j->next;
+		kfree(j);
+	}
+	return 0;
+}
+
+void rst_flush_filejobs(cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	while ((j = ctx->filejob_queue) != NULL) {
+		ctx->filejob_queue = j->next;
+		kfree(j);
+	}
+}
+
+int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct fs_struct *f = current->fs;
+	cpt_object_t *obj;
+
+	if (ti->cpt_fs == CPT_NULL) {
+		exit_fs(current);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			exit_fs(current);
+			f = obj->o_obj;
+			atomic_inc(&f->count);
+			current->fs = f;
+		}
+		return 0;
+	}
+
+	/* Do _not_ restore root. Image contains absolute pathnames.
+	 * So, we fix it in context of rst process.
+	 */
+
+	obj = cpt_object_add(CPT_OBJ_FS, f, ctx);
+	if (obj)
+		cpt_obj_setpos(obj, ti->cpt_fs, ctx);
+
+	return 0;
+}
+
+static int get_dir(struct dentry **dp, struct vfsmount **mp,
+		   loff_t *pos, struct cpt_context *ctx)
+{
+	struct cpt_file_image fi;
+	struct file * file;
+	int err;
+
+	err = rst_get_object(CPT_OBJ_FILE, *pos, &fi, ctx);
+	if (err)
+		return err;
+
+	file = rst_file(*pos, -2, ctx);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	*dp = dget(file->f_dentry);
+	*mp = mntget(file->f_vfsmnt);
+	*pos += fi.cpt_next;
+	fput(file);
+	return 0;
+}
+
+static void __set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
+			  struct dentry *dentry)
+{
+	struct dentry *old_root;
+	struct vfsmount *old_rootmnt;
+	write_lock(&fs->lock);
+	old_root = fs->root;
+	old_rootmnt = fs->rootmnt;
+	fs->rootmnt = mnt;
+	fs->root = dentry;
+	write_unlock(&fs->lock);
+	if (old_root) {
+		dput(old_root);
+		mntput(old_rootmnt);
+	}
+}
+
+static void __set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
+			 struct dentry *dentry)
+{
+	struct dentry *old_pwd;
+	struct vfsmount *old_pwdmnt;
+
+	write_lock(&fs->lock);
+	old_pwd = fs->pwd;
+	old_pwdmnt = fs->pwdmnt;
+	fs->pwdmnt = mnt;
+	fs->pwd = dentry;
+	write_unlock(&fs->lock);
+
+	if (old_pwd) {
+		dput(old_pwd);
+		mntput(old_pwdmnt);
+	}
+}
+
+
+int rst_restore_fs(struct cpt_context *ctx)
+{
+	loff_t pos;
+	cpt_object_t *obj;
+	int err = 0;
+
+	for_each_object(obj, CPT_OBJ_FS) {
+		struct cpt_fs_struct_image fi;
+		struct fs_struct *fs = obj->o_obj;
+		int i;
+		struct dentry *d[3];
+		struct vfsmount *m[3];
+
+		err = rst_get_object(CPT_OBJ_FS, obj->o_pos, &fi, ctx);
+		if (err)
+			return err;
+
+		fs->umask = fi.cpt_umask;
+
+		pos = obj->o_pos + fi.cpt_hdrlen;
+		d[0] = d[1] = d[2] = NULL;
+		m[0] = m[1] = m[2] = NULL;
+		i = 0;
+		while (pos < obj->o_pos + fi.cpt_next && i<3) {
+			err = get_dir(d+i, m+i, &pos, ctx);
+			if (err) {
+				eprintk_ctx("cannot get_dir: %d", err);
+				for (--i; i >= 0; i--) {
+					if (d[i])
+						dput(d[i]);
+					if (m[i])
+						mntput(m[i]);
+				}
+				return err;
+			}
+			i++;
+		}
+		if (d[0])
+			__set_fs_root(fs, m[0], d[0]);
+		if (d[1])
+			__set_fs_pwd(fs, m[1], d[1]);
+		if (d[2]) {
+			struct dentry *olddentry;
+			struct vfsmount *oldmnt;
+			write_lock(&fs->lock);
+			oldmnt = fs->altrootmnt;
+			olddentry = fs->altroot;
+			fs->altrootmnt = m[2];
+			fs->altroot = d[2];
+			write_unlock(&fs->lock);
+
+			if (olddentry) {
+				dput(olddentry);
+				mntput(oldmnt);
+			}
+		}
+	}
+	return err;
+}
+
+int do_one_mount(char *mntpnt, char *mnttype, char *mntbind,
+		 unsigned long flags, unsigned long mnt_flags,
+		 struct cpt_context *ctx)
+{
+	int err;
+
+	if (mntbind && (strcmp(mntbind, "/") == 0 || strcmp(mntbind, "") == 0))
+		mntbind = NULL;
+
+	if (mntbind)
+		flags |= MS_BIND;
+	/* Join per-mountpoint flags with global flags */
+	if (mnt_flags & MNT_NOSUID)
+		flags |= MS_NOSUID;
+	if (mnt_flags & MNT_NODEV)
+		flags |= MS_NODEV;
+	if (mnt_flags & MNT_NOEXEC)
+		flags |= MS_NOEXEC;
+
+	err = sc_mount(mntbind, mntpnt, mnttype, flags);
+	if (err < 0) {
+		eprintk_ctx("%d mounting %s %s %08lx\n", err, mntpnt, mnttype, flags);
+		return err;
+	}
+	return 0;
+}
+
+static int undumptmpfs(void *arg)
+{
+	int i;
+	int *pfd = arg;
+	char *argv[] = { "tar", "x", "-C", "/", "-S", NULL };
+
+	if (pfd[0] != 0)
+		sc_dup2(pfd[0], 0);
+
+	for (i=1; i<current->files->max_fds; i++)
+		sc_close(i);
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/bin/tar", argv, NULL);
+	eprintk("failed to exec /bin/tar: %d\n", i);
+	return -1;
+}
+
+static int rst_restore_tmpfs(loff_t *pos, struct cpt_context * ctx)
+{
+	int err;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	int n;
+	loff_t end;
+	int pid;
+
+	err = rst_get_object(CPT_OBJ_NAME, *pos, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	pid = err = local_kernel_thread(undumptmpfs, (void*)pfd, SIGCHLD, 0);
+	if (err < 0)
+		goto out;
+	f = fget(pfd[1]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	ctx->file->f_pos = *pos + v.cpt_hdrlen;
+	end = *pos + v.cpt_next;
+	*pos += v.cpt_next;
+	do {
+		char buf[16];
+		mm_segment_t oldfs;
+
+		n = end - ctx->file->f_pos;
+		if (n > sizeof(buf))
+			n = sizeof(buf);
+
+		if (ctx->read(buf, n, ctx))
+			break;
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		f->f_op->write(f, buf, n, &f->f_pos);
+		set_fs(oldfs);
+	} while (ctx->file->f_pos < end);
+
+	fput(f);
+
+	clear_tsk_thread_flag(current,TIF_SIGPENDING);
+
+	if ((err = sc_waitx(pid, 0)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+
+	return 0;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	return err;
+}
+
+int check_ext_mount(char *mntpnt, char *mnttype, struct cpt_context *ctx)
+{
+	struct namespace *n = current->namespace;
+	struct list_head *p;
+	struct vfsmount *t;
+	char *path, *path_buf;
+	int ret;
+
+	ret = -ENOENT;
+	path_buf = cpt_get_buf(ctx);
+	down_read(&n->sem);
+	list_for_each(p, &n->list) {
+		t = list_entry(p, struct vfsmount, mnt_list);
+		path = d_path(t->mnt_root, t, path_buf, PAGE_SIZE);
+		if (IS_ERR(path))
+			continue;
+		if (!strcmp(path, mntpnt) &&
+		    !strcmp(t->mnt_sb->s_type->name, mnttype)) {
+			ret = 0;
+			break;
+		}
+	}
+	up_read(&n->sem);
+	__cpt_release_buf(ctx);
+	return ret;
+}
+
+int restore_one_vfsmount(struct cpt_vfsmount_image *mi, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t endpos;
+
+	endpos = pos + mi->cpt_next;
+	pos += mi->cpt_hdrlen;
+
+	while (pos < endpos) {
+		char *mntdev;
+		char *mntpnt;
+		char *mnttype;
+		char *mntbind;
+
+		mntdev = __rst_get_name(&pos, ctx);
+		mntpnt = __rst_get_name(&pos, ctx);
+		mnttype = __rst_get_name(&pos, ctx);
+		mntbind = NULL;
+		if (mi->cpt_mntflags & CPT_MNT_BIND)
+			mntbind = __rst_get_name(&pos, ctx);
+		err = -EINVAL;
+		if (mnttype && mntpnt) {
+			err = 0;
+			if (!(mi->cpt_mntflags & CPT_MNT_EXT) &&
+			    strcmp(mntpnt, "/")) {
+				err = do_one_mount(mntpnt, mnttype, mntbind,
+						   mi->cpt_flags,
+						   mi->cpt_mntflags, ctx);
+				if (!err &&
+				    strcmp(mnttype, "tmpfs") == 0 &&
+				    !(mi->cpt_mntflags & (CPT_MNT_BIND)))
+					    rst_restore_tmpfs(&pos, ctx);
+			} else if (mi->cpt_mntflags & CPT_MNT_EXT) {
+				err = check_ext_mount(mntpnt, mnttype, ctx);
+				if (err)
+					eprintk_ctx("mount point is missing: %s\n", mntpnt);
+			}
+		}
+		if (mntdev)
+			rst_put_name(mntdev, ctx);
+		if (mntpnt)
+			rst_put_name(mntpnt, ctx);
+		if (mnttype)
+			rst_put_name(mnttype, ctx);
+		if (mntbind)
+			rst_put_name(mntbind, ctx);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int restore_one_namespace(loff_t pos, loff_t endpos, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_vfsmount_image mi;
+
+	while (pos < endpos) {
+		err = rst_get_object(CPT_OBJ_VFSMOUNT, pos, &mi, ctx);
+		if (err)
+			return err;
+		err = restore_one_vfsmount(&mi, pos, ctx);
+		if (err)
+			return err;
+		pos += mi.cpt_next;
+	}
+	return 0;
+}
+
+int rst_root_namespace(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NAMESPACE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr sbuf;
+	int done = 0;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NAMESPACE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		err = rst_get_object(CPT_OBJ_NAMESPACE, sec, &sbuf, ctx);
+		if (err)
+			return err;
+		if (done) {
+			eprintk_ctx("multiple namespaces are not supported\n");
+			break;
+		}
+		done++;
+		err = restore_one_namespace(sec+sbuf.cpt_hdrlen, sec+sbuf.cpt_next, ctx);
+		if (err)
+			return err;
+		sec += sbuf.cpt_next;
+	}
+
+	return 0;
+}
+
+int rst_stray_files(struct cpt_context *ctx)
+{
+	int err = 0;
+	loff_t sec = ctx->sections[CPT_SECT_FILES];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_FILES || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_object_hdr sbuf;
+		cpt_object_t *obj;
+
+		err = _rst_get_object(CPT_OBJ_FILE, sec, &sbuf, sizeof(sbuf), ctx);
+		if (err)
+			break;
+
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, sec, ctx);
+		if (!obj) {
+			struct file *file;
+
+			dprintk_ctx("stray file %Ld\n", sec);
+
+			file = rst_sysv_shm(sec, ctx);
+
+			if (IS_ERR(file)) {
+				eprintk_ctx("rst_stray_files: %ld\n", PTR_ERR(file));
+				return PTR_ERR(file);
+			} else {
+				fput(file);
+			}
+		}
+		sec += sbuf.cpt_next;
+	}
+
+	return err;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_i386.S linux-2.6.9-ve023stab054/kernel/cpt/rst_i386.S
--- linux-2.6.9-100.orig/kernel/cpt/rst_i386.S	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_i386.S	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,41 @@
+#define ASSEMBLY 1
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/asm_offsets.h>
+#include <asm/errno.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+
+	.section .text
+	.align 4
+	.global ret_last_siginfo
+ret_last_siginfo:
+	call rlsi
+	movl %eax,%esp
+	ret
+
+	.align 8
+	.global ret_child_tid
+ret_child_tid:
+	push %esp
+	call rct
+	movl %eax,%esp
+	ret
+
+	.align 4
+	.global ret_from_rst
+ret_from_rst:
+	pushl %eax
+	jmp ret_from_fork+6
+
+	.align 4
+	.global pre_ret_from_fork
+pre_ret_from_fork:
+	pushl %eax
+	call schedule_tail
+	popl %eax
+	ret
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_iterative.c linux-2.6.9-ve023stab054/kernel/cpt/rst_iterative.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_iterative.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_iterative.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,469 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/errno.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/uio.h>
+#include <asm/ldt.h>
+#include <asm/mmu.h>
+#include <asm/tlb.h>
+#include <linux/swapops.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_pagein.h"
+
+/* TODO:
+ * 1. Error handling and recovery
+ */
+
+struct swp_node
+{
+	swp_entry_t		ent;
+	struct anon_vma		*anon;
+};
+
+#define PFN_PER_PAGE (PAGE_SIZE/sizeof(struct swp_node))
+
+#define ITERDIR_SIZE (16*PAGE_SIZE)
+
+struct swp_node * lookup_pfn(u64 pfn, cpt_context_t *ctx)
+{
+	struct swp_node **dir = ctx->iter_dir;
+	struct swp_node *pd;
+
+	if (pfn/PFN_PER_PAGE >= ITERDIR_SIZE/sizeof(void*))
+		return NULL;
+
+	if (dir == NULL)
+		return NULL;
+
+	pd = dir[pfn/PFN_PER_PAGE];
+	if (pd == NULL)
+		return NULL;
+
+	pd += pfn%PFN_PER_PAGE;
+	return pd->ent.val ? pd : NULL;
+}
+
+int alloc_pfn(u64 pfn, swp_entry_t ent, cpt_context_t *ctx)
+{
+	struct swp_node **dir = ctx->iter_dir;
+
+	if (pfn/PFN_PER_PAGE >= ITERDIR_SIZE/sizeof(void*))
+		return -EINVAL;
+
+	if (dir == NULL) {
+		ctx->iter_dir = vmalloc(ITERDIR_SIZE);
+		if (ctx->iter_dir == NULL)
+			return -ENOMEM;
+		memset(ctx->iter_dir, 0, ITERDIR_SIZE);
+		dir = ctx->iter_dir;
+	}
+
+	if (unlikely(dir[pfn/PFN_PER_PAGE] == NULL)) {
+		dir[pfn/PFN_PER_PAGE] = (void*)__get_free_page(GFP_KERNEL);
+		if (dir[pfn/PFN_PER_PAGE] == NULL)
+			return -ENOMEM;
+		memset(dir[pfn/PFN_PER_PAGE], 0, PAGE_SIZE);
+	}
+
+	dir[pfn/PFN_PER_PAGE][pfn%PFN_PER_PAGE].ent = ent;
+	dir[pfn/PFN_PER_PAGE][pfn%PFN_PER_PAGE].anon = NULL;
+	return 0;
+}
+
+static int iter_clone(struct mm_struct * mm,
+		      unsigned long addr,
+		      struct page *src_page,
+		      cpt_context_t * ctx)
+{
+	int err;
+	struct page *page;
+	void *dst, *src;
+
+	err = get_user_pages(current, mm, addr,
+			     1, 1, 1, &page, NULL);
+	if (err == 0)
+		err = -EFAULT;
+	if (err < 0) {
+		eprintk_ctx("iter_clone: get_user_pages: %d\n", err);
+		return err;
+	}
+
+	dst = kmap(page);
+	src = kmap(src_page);
+	memcpy(dst, src, PAGE_SIZE);
+	kunmap(src_page);
+	kunmap(page);
+
+	page_cache_release(page);
+	return 0;
+}
+
+int rst_iter(struct vm_area_struct *vma, u64 pfn,
+	     unsigned long addr, cpt_context_t * ctx)
+{
+	int err = -EFAULT;
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	struct swp_node *swn;
+
+	swn = lookup_pfn(pfn, ctx);
+	if (swn == NULL) {
+		eprintk_ctx("rst_iter: missing pfn\n");
+		return -EINVAL;
+	}
+
+	if (swn->anon && swn->anon != vma->anon_vma) {
+		struct page * page;
+		spin_unlock(&mm->page_table_lock);
+		err = -ENOMEM;
+		page = read_swap_cache_async(swn->ent, vma, addr);
+		if (page) {
+			err = -EIO;
+			wait_on_page_locked(page);
+			if (PageUptodate(page))
+				err = iter_clone(mm, addr, page, ctx);
+			page_cache_release(page);
+		}
+		spin_lock(&mm->page_table_lock);
+		wprintk("cloning iter page due to anon vma mismatch %d\n", err);
+		return err;
+	}
+
+	pgd = pgd_offset(mm, addr);
+	if (unlikely(pgd_bad(*pgd)))
+		return -EINVAL;
+
+	pmd = pmd_alloc(mm, pgd, addr);
+	if (unlikely(!pmd))
+		return -ENOMEM;
+
+	pte = pte_alloc_map(mm, pmd, addr);
+	if (unlikely(!pte))
+		return -ENOMEM;
+
+	if (pte_none(*pte)) {
+		if (!swap_duplicate(swn->ent))
+			BUG();
+		set_pte(pte, swp_entry_to_pte(swn->ent));
+		swn->anon = vma->anon_vma;
+		err = 0;
+	} else {
+		eprintk_ctx("rst_iter for populated pte: 0x%lx %Lx\n", addr, pfn);
+	}
+	pte_unmap(pte);
+
+	return err;
+}
+
+static int nread(struct file *file, char *buf, int len)
+{
+	int offset = 0;
+
+	while (offset < len) {
+		int res;
+		mm_segment_t oldfs;
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		res = vfs_read(file, buf+offset, len-offset, &file->f_pos);
+		set_fs(oldfs);
+		if (res < 0)
+			return res;
+		if (res == 0)
+			return -EIO;
+		offset += res;
+	}
+	return 0;
+}
+
+
+static struct page *dontread_swap_cache(swp_entry_t entry, struct file *file)
+{
+	struct page *found_page, *new_page = NULL;
+	int err = 0;
+	void *dst;
+
+	do {
+		/*
+		 * First check the swap cache.  Since this is normally
+		 * called after lookup_swap_cache() failed, re-calling
+		 * that would confuse statistics.
+		 */
+		spin_lock_irq(&swapper_space.tree_lock);
+		found_page = radix_tree_lookup(&swapper_space.page_tree,
+						entry.val);
+		if (found_page)
+			page_cache_get(found_page);
+		spin_unlock_irq(&swapper_space.tree_lock);
+		if (found_page)
+			break;
+
+		/*
+		 * Get a new page to read into from swap.
+		 */
+		if (!new_page) {
+			new_page = alloc_page(GFP_HIGHUSER);
+			if (!new_page)
+				break;		/* Out of memory */
+		}
+
+		err = add_to_swap_cache(new_page, entry);
+		if (!err) {
+			lru_cache_add_active(new_page);
+			goto dirty_page;
+		}
+	} while (err != -ENOENT && err != -ENOMEM);
+
+	if (new_page)
+		page_cache_release(new_page);
+	if (found_page) {
+		lock_page(found_page);
+		new_page = found_page;
+		goto dirty_page;
+	}
+	return NULL;
+
+dirty_page:
+	dst = kmap(new_page);
+	err = nread(file, dst, PAGE_SIZE);
+	kunmap(new_page);
+	SetPageDirty(new_page);
+	SetPageUptodate(new_page);
+	unlock_page(new_page);
+	if (err) {
+		page_cache_release(new_page);
+		return NULL;
+	}
+	return new_page;
+}
+
+extern int __add_to_swap_cache(struct page *page,
+			       swp_entry_t entry, int gfp_mask);
+
+
+static int add_to_swap_ub(struct page * page, struct user_beancounter *ub)
+{
+	swp_entry_t entry;
+	int pf_flags;
+	int err;
+
+	if (!PageLocked(page))
+		BUG();
+
+	for (;;) {
+		entry = get_swap_page(ub);
+		if (!entry.val)
+			return 0;
+
+		/* Radix-tree node allocations are performing
+		 * GFP_ATOMIC allocations under PF_MEMALLOC.  
+		 * They can completely exhaust the page allocator.  
+		 *
+		 * So PF_MEMALLOC is dropped here.  This causes the slab 
+		 * allocations to fail earlier, so radix-tree nodes will 
+		 * then be allocated from the mempool reserves.
+		 *
+		 * We're still using __GFP_HIGH for radix-tree node
+		 * allocations, so some of the emergency pools are available,
+		 * just not all of them.
+		 */
+
+		pf_flags = current->flags;
+		current->flags &= ~PF_MEMALLOC;
+
+		/*
+		 * Add it to the swap cache and mark it dirty
+		 */
+		err = __add_to_swap_cache(page, entry, GFP_ATOMIC);
+
+		if (pf_flags & PF_MEMALLOC)
+			current->flags |= PF_MEMALLOC;
+
+		switch (err) {
+		case 0:				/* Success */
+			SetPageUptodate(page);
+			SetPageDirty(page);
+			return 1;
+		case -EEXIST:
+			/* Raced with "speculative" read_swap_cache_async */
+			swap_free(entry);
+			continue;
+		default:
+			/* -ENOMEM radix-tree allocation failure */
+			swap_free(entry);
+			return 0;
+		}
+	}
+}
+
+
+int rst_iteration(cpt_context_t *ctx)
+{
+	int err = 0;
+	struct file * file = ctx->pagein_file_in;
+	mm_segment_t oldfs;
+	struct user_beancounter *ub;
+
+	if (file == NULL || ctx->pagein_file_out == NULL)
+		return -EBADF;
+
+	ub = ctx->iter_ub;
+	if (ub == NULL) {
+		if (ctx->ve_id == 0) {
+			ub = get_beancounter(mm_ub(&init_mm));
+		} else {
+			ub = get_beancounter_byuid(ctx->ve_id, 1);
+			err = -ENOMEM;
+			if (ub == NULL)
+				goto out;
+		}
+		ctx->iter_ub = ub;
+	}
+	get_beancounter(ub);
+
+	for (;;) {
+		struct swp_node * swn;
+		swp_entry_t ent;
+		void *dst;
+		struct page * page;
+		struct pgin_reply rep;
+
+		err = nread(file, (void*)&rep, sizeof(rep));
+		if (err)
+			break;
+
+		if (rep.rmid != PGIN_RMID) {
+			err = -EINVAL;
+			eprintk_ctx("iter stream corrupt\n");
+			break;
+		}
+
+		if (rep.handle == 0) {
+			switch (rep.error) {
+			case ITER_PASS:
+				continue;
+			case ITER_STOP:
+				break;
+			default:
+				eprintk_ctx("iter stream corrupt: unknown control code %d\n", rep.error);
+				err = -EINVAL;
+			}
+			break;
+		}
+
+		err = -ENOMEM;
+
+		swn = lookup_pfn(rep.handle, ctx);
+		if (swn) {
+			page = dontread_swap_cache(swn->ent, file);
+			if (page == NULL)
+				break;
+			page_cache_release(page);
+			continue;
+		}
+
+		if (nr_swap_pages*4 < total_swap_pages)
+			break;
+
+		page = alloc_page(GFP_HIGHUSER);
+		if (page == NULL)
+			break;
+
+		dst = kmap(page);
+		err = nread(file, dst, PAGE_SIZE);
+		kunmap(page);
+
+		if (err) {
+			page_cache_release(page);
+			break;
+		}
+
+		ent.val = 0;
+		lock_page(page);
+		if (add_to_swap_ub(page, ub))
+			ent.val = page->private;
+		unlock_page(page);
+		page_cache_release(page);
+		err = -ENOMEM;
+		if (ent.val == 0)
+			break;
+
+		err = alloc_pfn(rep.handle, ent, ctx);
+		if (err)
+			break;
+
+		swap_duplicate(ent);
+	}
+	put_beancounter(ub);
+
+out:
+	if (!err) {
+		struct pgin_request req;
+		req.rmid = PGIN_RMID;
+		req.size = PGIN_STOP;
+		req.index = 0;
+		req.handle = 0;
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = vfs_write(ctx->pagein_file_out, (void*)&req, sizeof(req),
+				&ctx->pagein_file_out->f_pos);
+		set_fs(oldfs);
+		if (err != sizeof(req)) {
+			if (err >= 0)
+				err = -EIO;
+		} else {
+			err = 0;
+		}
+	}
+	if (err) {
+		fput(ctx->pagein_file_out);
+		ctx->pagein_file_out = NULL;
+		fput(ctx->pagein_file_in);
+		ctx->pagein_file_in = NULL;
+		rst_drop_iter_dir(ctx);
+	}
+	return err;
+}
+
+void rst_drop_iter_dir(cpt_context_t *ctx)
+{
+	int i, k;
+
+	if (ctx->iter_dir == NULL)
+		goto free_ub;
+
+	for (i=0; i<ITERDIR_SIZE/sizeof(void*); i++) {
+		struct swp_node **dir = ctx->iter_dir;
+		if (dir[i]) {
+			for (k=0; k<PFN_PER_PAGE; k++) {
+				swp_entry_t ent = dir[i][k].ent;
+				if (ent.val)
+					free_swap_and_cache(ent);
+			}
+			free_page((unsigned long)dir[i]);
+		}
+	}
+
+	vfree(ctx->iter_dir);
+	ctx->iter_dir = NULL;
+
+free_ub:
+	if (ctx->iter_ub) {
+		put_beancounter(ctx->iter_ub);
+		ctx->iter_ub = NULL;
+	}
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_mm.c linux-2.6.9-ve023stab054/kernel/cpt/rst_mm.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_mm.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_mm.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,1342 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <linux/hugetlb.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/mman.h>
+#include <linux/vmalloc.h>
+#include <linux/rmap.h>
+#include <linux/hash.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#include <asm/mmu_context.h>
+#include <linux/swapops.h>
+#include <linux/cpt_image.h>
+
+#ifdef CONFIG_VE
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+#endif
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_ubc.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+#include "cpt_pagein.h"
+
+#include "cpt_syscalls.h"
+
+#define __PAGE_NX (1ULL<<63)
+
+static unsigned long make_prot(struct cpt_vma_image *vmai)
+{
+	unsigned long prot = 0;
+
+	if (vmai->cpt_flags&VM_READ)
+		prot |= PROT_READ;
+	if (vmai->cpt_flags&VM_WRITE)
+		prot |= PROT_WRITE;
+	if (vmai->cpt_flags&VM_EXEC)
+		prot |= PROT_EXEC;
+	if (vmai->cpt_flags&VM_GROWSDOWN)
+		prot |= PROT_GROWSDOWN;
+	if (vmai->cpt_flags&VM_GROWSUP)
+		prot |= PROT_GROWSUP;
+	return prot;
+}
+
+static unsigned long make_flags(struct cpt_vma_image *vmai)
+{
+	unsigned long flags = MAP_FIXED;
+
+	if (vmai->cpt_flags&(VM_SHARED|VM_MAYSHARE))
+		flags |= MAP_SHARED;
+	else
+		flags |= MAP_PRIVATE;
+
+	if (vmai->cpt_file == CPT_NULL)
+		flags |= MAP_ANONYMOUS;
+	if (vmai->cpt_flags&VM_GROWSDOWN)
+		flags |= MAP_GROWSDOWN;
+	if (vmai->cpt_flags&VM_DENYWRITE)
+		flags |= MAP_DENYWRITE;
+	if (vmai->cpt_flags&VM_EXECUTABLE)
+		flags |= MAP_EXECUTABLE;
+	if (!(vmai->cpt_flags&VM_ACCOUNT))
+		flags |= MAP_NORESERVE;
+	return flags;
+}
+
+
+#ifndef CONFIG_X86_64
+static int __alloc_ldt(mm_context_t *pc, int mincount)
+{
+	int oldsize, newsize, nr;
+
+	if (mincount <= pc->size)
+		return 0;
+	/*
+	 * LDT got larger - reallocate if necessary.
+	 */
+	oldsize = pc->size;
+	mincount = (mincount+511)&(~511);
+	newsize = mincount*LDT_ENTRY_SIZE;
+	for (nr = 0; nr * PAGE_SIZE < newsize; nr++) {
+		BUG_ON(nr * PAGE_SIZE >= 64*1024);
+		if (!pc->ldt_pages[nr]) {
+			pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER|__GFP_UBC);
+			if (!pc->ldt_pages[nr])
+				goto nomem;
+			clear_highpage(pc->ldt_pages[nr]);
+		}
+	}
+	pc->size = mincount;
+	return 0;
+
+nomem:
+	while (--nr >= 0)
+		__free_page(pc->ldt_pages[nr]);
+	pc->size = 0;
+	return -ENOMEM;
+}
+
+static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = current->mm;
+	int i;
+	int err;
+	int size;
+
+	err = __alloc_ldt(&mm->context, li->cpt_size/LDT_ENTRY_SIZE);
+	if (err)
+		return err;
+
+	size = mm->context.size*LDT_ENTRY_SIZE;
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		int nr = i / PAGE_SIZE, bytes;
+		char *kaddr = kmap(mm->context.ldt_pages[nr]);
+
+		bytes = size - i;
+		if (bytes > PAGE_SIZE)
+			bytes = PAGE_SIZE;
+		err = ctx->pread(kaddr, bytes, ctx, pos + li->cpt_hdrlen + i);
+		kunmap(mm->context.ldt_pages[nr]);
+		if (err)
+			return err;
+	}
+
+	load_LDT(&mm->context);
+	return 0;
+}
+
+#else
+
+static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = current->mm;
+	int oldsize = mm->context.size;
+	void *oldldt;
+	void *newldt;
+	int err;
+
+	if (li->cpt_size > PAGE_SIZE)
+		newldt = vmalloc(li->cpt_size);
+	else
+		newldt = kmalloc(li->cpt_size, GFP_KERNEL);
+
+	if (!newldt)
+		return -ENOMEM;
+
+	err = ctx->pread(newldt, li->cpt_size, ctx, pos + li->cpt_hdrlen);
+	if (err)
+		return err;
+
+	oldldt = mm->context.ldt;
+	mm->context.ldt = newldt;
+	mm->context.size = li->cpt_size/LDT_ENTRY_SIZE;
+
+	load_LDT(&mm->context);
+
+	if (oldsize) {
+		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+			vfree(oldldt);
+		else
+			kfree(oldldt);
+	}
+	return 0;
+}
+#endif
+
+static int
+restore_aio_ring(struct kioctx *aio_ctx, struct cpt_aio_ctx_image *aimg)
+{
+	struct aio_ring_info *info = &aio_ctx->ring_info;
+	unsigned nr_events = aio_ctx->max_reqs;
+	unsigned long size;
+	int nr_pages;
+
+	/* We recalculate parameters of the ring exactly like
+	 * fs/aio.c does and then compare calculated values
+	 * with ones, stored in dump. They must be the same. */
+
+	nr_events += 2;
+
+	size = sizeof(struct aio_ring);
+	size += sizeof(struct io_event) * nr_events;
+	nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT;
+
+	if (nr_pages != aimg->cpt_ring_pages)
+		return -EINVAL;
+
+	info->nr_pages = nr_pages;
+
+	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
+
+	if (nr_events != aimg->cpt_nr)
+		return -EINVAL;
+
+	info->nr = 0;
+	info->ring_pages = info->internal_pages;
+	if (nr_pages > AIO_RING_PAGES) {
+		info->ring_pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
+		if (!info->ring_pages)
+			return -ENOMEM;
+		memset(info->ring_pages, 0, sizeof(struct page *) * nr_pages);
+	}
+
+	info->mmap_size = nr_pages * PAGE_SIZE;
+
+	down_read(&current->mm->mmap_sem);
+	info->mmap_base = aimg->cpt_mmap_base;
+	info->nr_pages = get_user_pages(current, current->mm,
+					info->mmap_base, nr_pages, 
+					1, 0, info->ring_pages, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (unlikely(info->nr_pages != nr_pages)) {
+		int i;
+
+		for (i=0; i<info->nr_pages; i++)
+			put_page(info->ring_pages[i]);
+		if (info->ring_pages && info->ring_pages != info->internal_pages)
+			kfree(info->ring_pages);
+		return -EFAULT;
+	}
+
+	aio_ctx->user_id = info->mmap_base;
+
+	info->nr = nr_events;
+	info->tail = aimg->cpt_tail;
+
+	return 0;
+}
+
+static int do_rst_aio(struct cpt_aio_ctx_image *aimg, loff_t pos, cpt_context_t *ctx)
+{
+	int err;
+	struct kioctx *aio_ctx;
+
+	aio_ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL);
+	if (!aio_ctx)
+		return -ENOMEM;
+
+	memset(aio_ctx, 0, sizeof(*aio_ctx));
+	aio_ctx->max_reqs = aimg->cpt_max_reqs;
+
+	if ((err = restore_aio_ring(aio_ctx, aimg)) < 0) {
+		kmem_cache_free(kioctx_cachep, aio_ctx);
+		eprintk_ctx("AIO %Ld restore_aio_ring: %d\n", pos, err);
+		return err;
+	}
+
+	aio_ctx->mm = current->mm;
+	atomic_inc(&aio_ctx->mm->mm_count);
+	atomic_set(&aio_ctx->users, 1);
+	spin_lock_init(&aio_ctx->ctx_lock);
+	spin_lock_init(&aio_ctx->ring_info.ring_lock);
+	init_waitqueue_head(&aio_ctx->wait);
+	INIT_LIST_HEAD(&aio_ctx->active_reqs);
+	INIT_LIST_HEAD(&aio_ctx->run_list);
+	INIT_WORK(&aio_ctx->wq, aio_kick_handler, ctx);
+
+	atomic_add(aio_ctx->max_reqs, &aio_nr);
+
+	write_lock(&aio_ctx->mm->ioctx_list_lock);
+	aio_ctx->next = aio_ctx->mm->ioctx_list;
+	aio_ctx->mm->ioctx_list = aio_ctx;
+	write_unlock(&aio_ctx->mm->ioctx_list_lock);
+
+	return 0;
+}
+
+static int rst_pagein(struct vm_area_struct *vma, u32 idx,
+		      unsigned long addr, cpt_context_t * ctx)
+{
+	int err = -EFAULT;
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset(mm, addr);
+	if (unlikely(pgd_bad(*pgd)))
+		return -EINVAL;
+
+	pmd = pmd_alloc(mm, pgd, addr);
+	if (unlikely(!pmd))
+		return -ENOMEM;
+
+	pte = pte_alloc_map(mm, pmd, addr);
+	if (unlikely(!pte))
+		return -ENOMEM;
+
+	if (pte_none(*pte)) {
+		swp_entry_t entry;
+		swap_list_lock();
+		swap_device_lock(ctx->pgin_swp);
+		if (ctx->pgin_swp->swap_map[idx+1]++ == 0) {
+			ctx->pgin_swp->inuse_pages++;
+			nr_swap_pages--;
+		}
+#if CONFIG_USER_SWAP_ACCOUNTING
+		if (ctx->pgin_swp->owner_map[idx+1] == NULL) {
+			struct user_beancounter *ub = mm_ub(mm);
+			ub_swapentry_inc(ub);
+			ctx->pgin_swp->owner_map[idx+1] = get_beancounter(ub);
+		}
+#endif
+		swap_device_unlock(ctx->pgin_swp);
+		swap_list_unlock();
+		entry = swp_entry(ctx->lazytype, idx+1);
+		set_pte(pte, swp_entry_to_pte(entry));
+		err = 0;
+	} else {
+		eprintk_ctx("rst_pagein for populated pte: 0x%lx %u\n", addr, idx);
+	}
+	pte_unmap(pte);
+
+	return err;
+}
+
+struct anonvma_map
+{
+	struct hlist_node	list;
+	struct anon_vma		*avma;
+	__u64			id;
+};
+
+static int verify_create_anonvma(struct mm_struct *mm,
+				 struct cpt_vma_image *vmai,
+				 cpt_context_t *ctx)
+{
+	struct anon_vma *avma = NULL;
+	struct anon_vma *new_avma;
+	struct vm_area_struct *vma;
+	int h;
+
+	if (!ctx->anonvmas) {
+		if (CPT_ANONVMA_HSIZE*sizeof(struct hlist_head) > PAGE_SIZE)
+			return -EINVAL;
+		if ((ctx->anonvmas = (void*)__get_free_page(GFP_KERNEL)) == NULL)
+			return -ENOMEM;
+		for (h = 0; h < CPT_ANONVMA_HSIZE; h++)
+			INIT_HLIST_HEAD(&ctx->anonvmas[h]);
+	} else {
+		struct anonvma_map *map;
+		struct hlist_node *elem;
+
+		h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
+		hlist_for_each_entry(map, elem, &ctx->anonvmas[h], list) {
+			if (map->id == vmai->cpt_anonvmaid) {
+				avma = map->avma;
+				break;
+			}
+		}
+	}
+
+	down_read(&mm->mmap_sem);
+	if ((vma = find_vma(mm, vmai->cpt_start)) == NULL) {
+		up_read(&mm->mmap_sem);
+		return -ESRCH;
+	}
+	if (vma->vm_start != vmai->cpt_start) {
+		up_read(&mm->mmap_sem);
+		eprintk_ctx("vma start mismatch\n");
+		return -EINVAL;
+	}
+	if (vma->vm_pgoff != vmai->cpt_pgoff) { 
+		dprintk_ctx("vma pgoff mismatch, fixing\n");
+		if (vma->vm_file || (vma->vm_flags&(VM_SHARED|VM_MAYSHARE))) {
+			eprintk_ctx("cannot fixup vma pgoff\n");
+			up_read(&mm->mmap_sem);	
+			return -EINVAL;
+		}
+		vma->vm_pgoff = vmai->cpt_pgoff;
+	}
+
+	if (!vma->anon_vma) {
+		if (avma) {
+			vma->anon_vma = avma;
+			anon_vma_link(vma);
+		} else {
+			int err;
+
+			err = anon_vma_prepare(vma);
+
+			if (err) {
+				up_read(&mm->mmap_sem);
+				return err;
+			}
+		}
+	} else {
+		/* Note, we _can_ arrive to the situation, when two
+		 * different anonvmaid's point to one anon_vma, this happens
+		 * f.e. when mmap() merged new area to previous one and
+		 * they will share one anon_vma even if they did not on
+		 * original host.
+		 *
+		 * IT IS OK. To all that I understand, we may merge all
+		 * the anon_vma's and rmap can scan all the huge list of vmas
+		 * searching for page. It is just "suboptimal".
+		 *
+		 * Real disaster would happen, if vma already got an anon_vma
+		 * with different id. It is very rare case, kernel does the
+		 * best efforts to merge anon_vmas when some attributes are
+		 * different. In this case we will fall to copying memory.
+		 */
+		if (avma && vma->anon_vma != avma) {
+			up_read(&mm->mmap_sem);
+			wprintk_ctx("anon_vma mismatch\n");
+			return 0;
+		}
+	}
+
+	new_avma = vma->anon_vma;
+	up_read(&mm->mmap_sem);
+
+	if (!avma) {
+		struct anonvma_map *map;
+
+		if (!new_avma)
+			return -EINVAL;
+
+		if ((map = ub_kmalloc(sizeof(*map), GFP_KERNEL)) == NULL)
+			return -ENOMEM;
+
+		map->id = vmai->cpt_anonvmaid;
+		map->avma = new_avma;
+		h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
+		hlist_add_head(&map->list, &ctx->anonvmas[h]);
+	}
+	return 0;
+}
+
+static int copy_mm_pages(struct mm_struct *src, unsigned long start,
+			 unsigned long end)
+{
+	int err;
+
+	for (; start < end; start += PAGE_SIZE) {
+		struct page *page;
+		struct page *spage;
+		void *maddr, *srcaddr;
+
+		err = get_user_pages(current, current->mm,
+				     start, 1, 1, 1, &page, NULL);
+		if (err == 0)
+			err = -EFAULT;
+		if (err < 0)
+			return err;
+
+		err = get_user_pages(current, src,
+				     start, 1, 0, 1, &spage, NULL);
+
+		if (err == 0)
+			err = -EFAULT;
+		if (err < 0) {
+			page_cache_release(page);
+			return err;
+		}
+
+		srcaddr = kmap(spage);
+		maddr = kmap(page);
+		memcpy(maddr, srcaddr, PAGE_SIZE);
+		set_page_dirty_lock(page);
+		kunmap(page);
+		kunmap(spage);
+		page_cache_release(page);
+		page_cache_release(spage);
+	}
+	return 0;
+}
+
+static int do_rst_vma(struct cpt_vma_image *vmai, loff_t vmapos, loff_t mmpos, struct cpt_context *ctx)
+{
+	int err = 0;
+	unsigned long addr;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct file *file = NULL;
+	unsigned long prot;
+	int checked = 0;
+
+	if (vmai->cpt_type == CPT_VMA_VDSO) {
+		if (ctx->vdso == NULL) {
+			err = arch_setup_additional_pages(NULL, 0, vmai->cpt_start);
+			goto out;
+		}
+	}
+
+	prot = make_prot(vmai);
+
+	if (vmai->cpt_file != CPT_NULL) {
+		if (vmai->cpt_type == CPT_VMA_TYPE_0) {
+			file = rst_file(vmai->cpt_file, -1, ctx);
+			if (IS_ERR(file)) {
+				eprintk_ctx("do_rst_vma: rst_file: %Ld\n", vmai->cpt_file);
+				return PTR_ERR(file);
+			}
+		} else if (vmai->cpt_type == CPT_VMA_TYPE_SHM) {
+			file = rst_sysv_shm(vmai->cpt_file, ctx);
+			if (IS_ERR(file))
+				return PTR_ERR(file);
+		}
+	}
+
+	down_write(&mm->mmap_sem);
+	addr = do_mmap_pgoff(file, vmai->cpt_start,
+			     vmai->cpt_end-vmai->cpt_start,
+			     prot, make_flags(vmai),
+			     vmai->cpt_pgoff);
+
+	if (addr != vmai->cpt_start) {
+		up_write(&mm->mmap_sem);
+
+		err = -EINVAL;
+		if (IS_ERR((void*)addr))
+			err = addr;
+		goto out;
+	}
+
+	vma = find_vma(mm, vmai->cpt_start);
+	if (vma == NULL) {
+		up_write(&mm->mmap_sem);
+		eprintk_ctx("cannot find mmapped vma\n");
+		err = -ESRCH;
+		goto out;
+	}
+
+	/* do_mmap_pgoff() can merge new area to previous one (not to the next,
+	 * we mmap in order, the rest of mm is still unmapped). This can happen
+	 * f.e. if flags are to be adjusted later, or if we had different
+	 * anon_vma on two adjacent regions. Split it by brute force. */
+	if (vma->vm_start != vmai->cpt_start) {
+		dprintk_ctx("vma %Ld merged, split\n", vmapos);
+		err = split_vma(mm, vma, (unsigned long)vmai->cpt_start, 0);
+		if (err) {
+			up_write(&mm->mmap_sem);
+			eprintk_ctx("cannot split vma\n");
+			goto out;
+		}
+	}
+	up_write(&mm->mmap_sem);
+
+	if (vmai->cpt_anonvma && vmai->cpt_anonvmaid) {
+		err = verify_create_anonvma(mm, vmai, ctx);
+		if (err) {
+			eprintk_ctx("cannot verify_create_anonvma %Ld\n", vmapos);
+			goto out;
+		}
+	}
+
+	if (vmai->cpt_type == CPT_VMA_VDSO) {
+		struct page *page;
+		void *maddr;
+
+		err = get_user_pages(current, current->mm,
+				(unsigned long)vmai->cpt_start,
+				1, 1, 1, &page, NULL);
+		if (err == 0)
+			err = -EFAULT;
+		if (err < 0) {
+			eprintk_ctx("can't get vdso: get_user_pages: %d\n", err);
+			goto out;
+		}
+		err = 0;
+		maddr = kmap(page);
+		memcpy(maddr, ctx->vdso, PAGE_SIZE);
+		set_page_dirty_lock(page);
+		kunmap(page);
+		page_cache_release(page);
+		goto out;
+	}
+	if (vmai->cpt_next > vmai->cpt_hdrlen) {
+		loff_t offset = vmapos + vmai->cpt_hdrlen;
+
+		do {
+			union {
+				struct cpt_page_block pb;
+				struct cpt_remappage_block rpb;
+				struct cpt_copypage_block cpb;
+				struct cpt_lazypage_block lpb;
+				struct cpt_iterpage_block ipb;
+			} u;
+			loff_t pos;
+
+			err = rst_get_object(-1, offset, &u, ctx);
+			if (err) {
+				eprintk_ctx("vma fix object: %d\n", err);
+				goto out;
+			}
+			if (u.rpb.cpt_object == CPT_OBJ_REMAPPAGES) {
+				err = sc_remap_file_pages(u.rpb.cpt_start,
+							  u.rpb.cpt_end-u.rpb.cpt_start,
+							  0, u.rpb.cpt_pgoff, 0);
+				if (err < 0) {
+					eprintk_ctx("remap_file_pages: %d (%08x,%u,%u)\n", err,
+					       (__u32)u.rpb.cpt_start, (__u32)(u.rpb.cpt_end-u.rpb.cpt_start), 
+					       (__u32)u.rpb.cpt_pgoff);
+					goto out;
+				}
+				offset += u.rpb.cpt_next;
+				continue;
+			} else if (u.cpb.cpt_object == CPT_OBJ_LAZYPAGES) {
+				addr = u.lpb.cpt_start;
+
+				down_read(&mm->mmap_sem);
+				if ((vma = find_vma(mm, u.lpb.cpt_start)) == NULL) {
+					up_read(&mm->mmap_sem);
+					eprintk_ctx("lost vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+				err = anon_vma_prepare(vma);
+				if (err) {
+					up_read(&mm->mmap_sem);
+					goto out;
+				}
+				spin_lock(&mm->page_table_lock);
+				while (addr < u.lpb.cpt_end) {
+					err = rst_pagein(vma, u.lpb.cpt_index + (addr-u.lpb.cpt_start)/PAGE_SIZE,
+							 addr, ctx);
+					if (err)
+						break;
+					addr += PAGE_SIZE;
+				}
+				spin_unlock(&mm->page_table_lock);
+				up_read(&mm->mmap_sem);
+				if (err)
+					goto out;
+				offset += u.cpb.cpt_next;
+				continue;
+			} else if (u.cpb.cpt_object == CPT_OBJ_COPYPAGES) {
+				struct vm_area_struct *vma, *vma1;
+				struct mm_struct *src;
+				struct anon_vma *src_anon;
+				cpt_object_t *mobj;
+
+				if (!vmai->cpt_anonvmaid) {
+					err = -EINVAL;
+					eprintk_ctx("CPT_OBJ_COPYPAGES in !anonvma\n");
+					goto out;
+				}
+
+				mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, u.cpb.cpt_source, ctx);
+				if (!mobj) {
+					eprintk_ctx("lost mm_struct to clone pages from\n");
+					err = -ESRCH;
+					goto out;
+				}
+				src = mobj->o_obj;
+
+				down_read(&src->mmap_sem);
+				src_anon = NULL;
+				vma1 = find_vma(src, u.cpb.cpt_start);
+				if (vma1)
+					src_anon = vma1->anon_vma;
+				up_read(&src->mmap_sem);
+
+				if (!vma1) {
+					eprintk_ctx("lost src vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+
+				down_read(&mm->mmap_sem);
+				if ((vma = find_vma(mm, u.cpb.cpt_start)) == NULL) {
+					up_read(&mm->mmap_sem);
+					eprintk_ctx("lost vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+
+				if (!src_anon ||
+				    !vma->anon_vma ||
+				    vma->anon_vma != src_anon ||
+				    vma->vm_start - vma1->vm_start !=
+				    (vma->vm_pgoff - vma1->vm_pgoff) << PAGE_SHIFT) {
+					up_read(&mm->mmap_sem);
+					wprintk_ctx("anon_vma mismatch in vm_area_struct %Ld\n", vmapos);
+					err = copy_mm_pages(mobj->o_obj,
+							    u.cpb.cpt_start,
+							    u.cpb.cpt_end);
+				} else {
+					spin_lock(&mm->page_table_lock);
+					err = __copy_page_range(vma, mobj->o_obj,
+								u.cpb.cpt_start,
+								u.cpb.cpt_end-u.cpb.cpt_start);
+					spin_unlock(&mm->page_table_lock);
+					up_read(&mm->mmap_sem);
+				}
+				if (err) {
+					eprintk_ctx("clone_page_range: %d (%08x,%u,%ld)\n", err,
+						(__u32)u.cpb.cpt_start, (__u32)(u.cpb.cpt_end-u.cpb.cpt_start), 
+						(long)u.cpb.cpt_source);
+					goto out;
+				}
+
+				offset += u.cpb.cpt_next;
+				continue;
+			} else if (u.pb.cpt_object == CPT_OBJ_ITERPAGES ||
+				   u.pb.cpt_object == CPT_OBJ_ITERYOUNGPAGES
+				   ) {
+				u64 page_pos[16];
+				pos = offset + sizeof(u.pb);
+
+				addr = u.lpb.cpt_start;
+
+				err = ctx->pread(&page_pos,
+						 8*(u.lpb.cpt_end-addr)/PAGE_SIZE,
+						 ctx,
+						 pos);
+				if (err) {
+					eprintk_ctx("Oops\n");
+					goto out;
+				}
+
+				down_read(&mm->mmap_sem);
+				if ((vma = find_vma(mm, u.lpb.cpt_start)) == NULL) {
+					eprintk_ctx("lost vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+				err = anon_vma_prepare(vma);
+				if (err) {
+					up_read(&mm->mmap_sem);
+					goto out;
+				}
+				spin_lock(&mm->page_table_lock);
+				while (addr < u.lpb.cpt_end) {
+					err = rst_iter(vma,
+						       page_pos[(addr-u.lpb.cpt_start)/PAGE_SIZE],
+						       addr,
+						       ctx);
+					if (err)
+						break;
+					addr += PAGE_SIZE;
+				}
+				spin_unlock(&mm->page_table_lock);
+				if (u.pb.cpt_object == CPT_OBJ_ITERYOUNGPAGES) {
+					make_pages_present((unsigned long)u.lpb.cpt_start,
+							   (unsigned long)u.lpb.cpt_end);
+				}
+				up_read(&mm->mmap_sem);
+				if (err)
+					goto out;
+				offset += u.cpb.cpt_next;
+				continue;
+			}
+			if (u.pb.cpt_object != CPT_OBJ_PAGES) {
+				eprintk_ctx("unknown vma fix object %d\n", u.pb.cpt_object);
+				err = -EINVAL;
+				goto out;
+			}
+			pos = offset + sizeof(u.pb);
+			if (!(vmai->cpt_flags&VM_ACCOUNT) && !(prot&PROT_WRITE)) {
+				/* I guess this is get_user_pages() messed things,
+				 * this happens f.e. when gdb inserts breakpoints.
+				 */
+				int i;
+				for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/PAGE_SIZE; i++) {
+					struct page *page;
+					void *maddr;
+					err = get_user_pages(current, current->mm,
+							     (unsigned long)u.pb.cpt_start + i*PAGE_SIZE,
+							     1, 1, 1, &page, NULL);
+					if (err == 0)
+						err = -EFAULT;
+					if (err < 0) {
+						eprintk_ctx("get_user_pages: %d\n", err);
+						goto out;
+					}
+					err = 0;
+					maddr = kmap(page);
+					if (u.pb.cpt_content == CPT_CONTENT_VOID) {
+						memset(maddr, 0, PAGE_SIZE);
+					} else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
+						err = ctx->pread(maddr, PAGE_SIZE,
+								 ctx, pos + i*PAGE_SIZE);
+						if (err) {
+							kunmap(page);
+							goto out;
+						}
+					} else {
+						err = -EINVAL;
+						kunmap(page);
+						goto out;
+					}
+					set_page_dirty_lock(page);
+					kunmap(page);
+					page_cache_release(page);
+				}
+			} else {
+				if (!(prot&PROT_WRITE))
+					sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
+				if (u.pb.cpt_content == CPT_CONTENT_VOID) {
+					int i;
+					for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/sizeof(unsigned long); i++) {
+						err = __put_user(0UL, ((unsigned long __user*)(unsigned long)u.pb.cpt_start) + i);
+						if (err) {
+							eprintk_ctx("__put_user 2 %d\n", err);
+							goto out;
+						}
+					}
+				} else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
+					loff_t tpos = pos;
+					err = ctx->file->f_op->read(ctx->file, cpt_ptr_import(u.pb.cpt_start),
+							 u.pb.cpt_end-u.pb.cpt_start,
+							 &tpos);
+					if (err != u.pb.cpt_end-u.pb.cpt_start) {
+						if (err >= 0)
+							err = -EIO;
+						goto out;
+					}
+				} else {
+					err = -EINVAL;
+					goto out;
+				}
+				if (!(prot&PROT_WRITE))
+					sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
+			}
+			err = 0;
+			offset += u.pb.cpt_next;
+		} while (offset < vmapos + vmai->cpt_next);
+	}
+
+check:
+	do {
+		struct vm_area_struct *vma;
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, addr);
+		if (vma) {
+			if ((vma->vm_flags^vmai->cpt_flags)&VM_READHINTMASK) {
+				VM_ClearReadHint(vma);
+				vma->vm_flags |= vmai->cpt_flags&VM_READHINTMASK;
+			}
+			if ((vma->vm_flags^vmai->cpt_flags)&VM_LOCKED) {
+				dprintk_ctx("fixing up VM_LOCKED %Ld\n", vmapos);
+				up_read(&mm->mmap_sem);
+				if (vma->vm_flags&VM_LOCKED)
+					err = sc_munlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
+				else
+					err = sc_mlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
+				/* When mlock fails with EFAULT, it means
+				 * that it could not bring in pages.
+				 * It can happen after mlock() on unreadable
+				 * VMAs. But VMA is correctly locked,
+				 * so that this error can be ignored. */
+				if (err == -EFAULT)
+					err = 0;
+				if (err)
+					goto out;
+				goto check;
+			}
+			if ((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&~__PAGE_NX)
+				wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
+				       (__u64)vma->vm_page_prot.pgprot, (__u64)vmai->cpt_pgprot);
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
+			if (((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&__PAGE_NX) &&
+			    (ctx->kernel_config_flags&CPT_KERNEL_CONFIG_PAE))
+				wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
+				       (__u64)vma->vm_page_prot.pgprot, (__u64)vmai->cpt_pgprot);
+#endif
+			if (vma->vm_flags != vmai->cpt_flags) {
+				unsigned long x = vma->vm_flags ^ vmai->cpt_flags;
+				if (x & VM_EXEC) {
+					/* Crap. On i386 this is OK.
+					 * It is impossible to make via mmap/mprotect
+					 * exec.c clears VM_EXEC on stack. */
+					vma->vm_flags &= ~VM_EXEC;
+				} else if ((x & VM_ACCOUNT) && !checked) {
+					checked = 1;
+					if (!(prot&PROT_WRITE)) {
+						up_read(&mm->mmap_sem);
+						sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
+						sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
+						goto check;
+					}
+					wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
+					       (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
+				} else {
+					wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
+					       (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
+				}
+			}
+		} else {
+			wprintk_ctx("no VMA for %08lx@%ld\n", addr, (long)vmapos);
+		}
+		up_read(&mm->mmap_sem);
+	} while (0);
+
+out:
+	if (file)
+		fput(file);
+	return err;
+}
+
+static int do_rst_mm(struct cpt_mm_image *vmi, loff_t pos, struct cpt_context *ctx)
+{
+	int err = 0;
+	unsigned int def_flags;
+	struct mm_struct *mm = current->mm;
+	struct user_beancounter *bc;
+
+	down_write(&mm->mmap_sem);
+	do_munmap(mm, 0, TASK_SIZE);
+
+	/*
+	 * MM beancounter is usually correct from the fork time,
+	 * but not for init, for example.
+	 * Luckily, mm_ub can be changed for a completely empty MM.
+	 */
+	bc = rst_lookup_ubc(vmi->cpt_mmub, ctx);
+	err = virtinfo_notifier_call(VITYPE_SCP, VIRTINFO_SCP_RSTMM, bc);
+	if (err & NOTIFY_FAIL) {
+		up_write(&mm->mmap_sem);
+		return -ECHRNG;
+	}
+	if ((err & VIRTNOTIFY_CHANGE) && bc != mm_ub(mm)) {
+		struct user_beancounter *old_bc = mm_ub(mm);
+		mm_ub(mm) = bc;
+		bc = old_bc;
+	}
+	err = 0;
+	put_beancounter(bc);
+
+	mm->start_code = vmi->cpt_start_code;
+	mm->end_code = vmi->cpt_end_code;
+	mm->start_data = vmi->cpt_start_data;
+	mm->end_data = vmi->cpt_end_data;
+	mm->start_brk = vmi->cpt_start_brk;
+	mm->brk = vmi->cpt_brk;
+	mm->start_stack = vmi->cpt_start_stack;
+	mm->arg_start = vmi->cpt_start_arg;
+	mm->arg_end = vmi->cpt_end_arg;
+	mm->env_start = vmi->cpt_start_env;
+	mm->env_end = vmi->cpt_end_env;
+	mm->def_flags = 0;
+	def_flags = vmi->cpt_def_flags;
+
+	mm->dumpable = vmi->cpt_dumpable;
+	mm->vps_dumpable = vmi->cpt_vps_dumpable;
+	if (ctx->image_version >= CPT_VERSION_9) {
+		mm->context.vdso = cpt_ptr_import(vmi->cpt_vdso);
+		current_thread_info()->sysenter_return = SYSEXIT_RETURN;
+	}
+
+#if 0 /* def CONFIG_HUGETLB_PAGE*/
+/* NB: ? */
+	int used_hugetlb;
+#endif
+	up_write(&mm->mmap_sem);
+
+	if (vmi->cpt_next > vmi->cpt_hdrlen) {
+		loff_t offset = pos + vmi->cpt_hdrlen;
+		do {
+			union {
+				struct cpt_vma_image vmai;
+				struct cpt_aio_ctx_image aioi;
+				struct cpt_obj_bits bits;
+			} u;
+			err = rst_get_object(-1, offset, &u, ctx);
+			if (err)
+				goto out;
+			if (u.vmai.cpt_object == CPT_OBJ_VMA) {
+				err = do_rst_vma(&u.vmai, offset, pos, ctx);
+				if (err)
+					goto out;
+			} else if (u.bits.cpt_object == CPT_OBJ_BITS &&
+				   u.bits.cpt_content == CPT_CONTENT_MM_CONTEXT) {
+				err = do_rst_ldt(&u.bits, offset, ctx);
+				if (err)
+					goto out;
+			} else if (u.aioi.cpt_object == CPT_OBJ_AIO_CONTEXT) {
+				err = do_rst_aio(&u.aioi, offset, ctx);
+				if (err)
+					goto out;
+			} else {
+				eprintk_ctx("unknown object %u in mm image\n", u.vmai.cpt_object);
+				err = -EINVAL;
+				goto out;
+			}
+			offset += u.vmai.cpt_next;
+		} while (offset < pos + vmi->cpt_next);
+	}
+
+	down_write(&mm->mmap_sem);
+	mm->def_flags = def_flags;
+	up_write(&mm->mmap_sem);
+
+
+out:
+	return err;
+}
+
+int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err = 0;
+	cpt_object_t *mobj;
+	void *tmp = (void*)__get_free_page(GFP_KERNEL);
+	struct cpt_mm_image *vmi = (struct cpt_mm_image *)tmp;
+
+	if (!tmp)
+		return -ENOMEM;
+
+	if (ti->cpt_mm == CPT_NULL) {
+		if (current->mm) {
+			virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_EXIT,
+					current);
+			exit_mm(current);
+		}
+		goto out;
+	}
+
+	mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
+	if (mobj) {
+		if (current->mm != mobj->o_obj) BUG();
+		goto out;
+	}
+
+	if (current->mm == NULL) {
+		struct mm_struct *mm = mm_alloc();
+		if (mm == NULL) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = init_new_context(current, mm);
+		if (err) {
+			mmdrop(mm);
+			goto out;
+		}
+		current->mm = mm;
+	}
+
+	if ((err = rst_get_object(CPT_OBJ_MM, ti->cpt_mm, vmi, ctx)) != 0)
+		goto out;
+	if ((err = do_rst_mm(vmi, ti->cpt_mm, ctx)) != 0) {
+		eprintk_ctx("do_rst_mm %Ld\n", ti->cpt_mm);
+		goto out;
+	}
+	err = -ENOMEM;
+	mobj = cpt_object_add(CPT_OBJ_MM, current->mm, ctx);
+	if (mobj != NULL) {
+		err = 0;
+		cpt_obj_setpos(mobj, ti->cpt_mm, ctx);
+	}
+
+out:
+	if (tmp)
+		free_page((unsigned long)tmp);
+	return err;
+}
+
+/* This is part of mm setup, made in parent context. Mostly, it is the place,
+ * where we graft mm of another process to child.
+ */
+
+int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	task_t *tsk = obj->o_obj;
+	cpt_object_t *mobj;
+
+	/* Task without mm. Just get rid of this. */
+	if (ti->cpt_mm == CPT_NULL) {
+		if (tsk->mm) {
+			virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_EXIT,
+					tsk);
+			mmput(tsk->mm);
+			tsk->mm = NULL;
+		}
+		return 0;
+	}
+
+	mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
+	if (mobj) {
+		struct mm_struct *newmm = mobj->o_obj;
+		/* Good, the MM is already created. */
+		if (newmm == tsk->mm) {
+			/* Already done by clone(). */
+			return 0;
+		}
+		mmput(tsk->mm);
+		atomic_inc(&newmm->mm_users);
+		tsk->mm = newmm;
+		tsk->active_mm = newmm;
+	}
+	return 0;
+}
+
+/* We use CLONE_VM when mm of child is going to be shared with parent.
+ * Otherwise mm is copied.
+ */
+
+__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	if (ti->cpt_mm == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx))
+		return CLONE_VM;
+	return 0;
+}
+
+static void unuse_ahead(swp_entry_t entry)
+{
+	int i, num;
+	struct page *new_page;
+	unsigned long offset, toff;
+	struct swap_info_struct *swapdev = swp_type(entry) + swap_info;
+
+	i = 16;
+	num = 0;
+
+	toff = swp_offset(entry);
+	BUG_ON(!toff);
+	offset = toff;
+
+	swap_device_lock(swapdev);
+	do {
+		/* Don't read-ahead past the end of the swap area */
+		if (toff >= swapdev->max)
+			break;
+		/* Don't read in free or bad pages */
+		if (!swapdev->swap_map[toff])
+			break;
+		if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
+			break;
+		toff++;
+		num++;
+	} while (--i);
+	swap_device_unlock(swapdev);
+
+	for (i = 0; i < num; offset++, i++) {
+		/* Ok, do the async read-ahead now */
+		new_page = read_swap_cache_async(swp_entry(swp_type(entry),
+							   offset), NULL, 0);
+		if (!new_page)
+			break;
+		page_cache_release(new_page);
+	}
+	/* Is this necessary? */
+	lru_add_drain();	/* Push any new pages onto the LRU now */
+}
+
+static int swapoff_pmd(struct vm_area_struct * vma, pmd_t *dir,
+	unsigned long address, unsigned long size, unsigned long offset,
+	cpt_context_t *ctx)
+{
+	int err;
+	pte_t * pte;
+	unsigned long end;
+
+	if (pmd_none(*dir))
+		return 0;
+	if (pmd_bad(*dir)) {
+		pmd_ERROR(*dir);
+		pmd_clear(dir);
+		return 0;
+	}
+	pte = pte_offset_map(dir, address);
+	offset += address & PMD_MASK;
+	address &= ~PMD_MASK;
+	end = address + size;
+	if (end > PMD_SIZE)
+		end = PMD_SIZE;
+	do {
+		swp_entry_t swpent;
+		if (pte_present(*pte) || pte_none(*pte))
+			goto next;
+		swpent = pte_to_swp_entry(*pte);
+		if (swp_type(swpent) != ctx->lazytype)
+			goto next;
+		pte_unmap(pte);
+		spin_unlock(&vma->vm_mm->page_table_lock);
+
+		unuse_ahead(swpent);
+
+		err = handle_mm_fault(vma->vm_mm, vma, offset + address, 0);
+
+		spin_lock(&vma->vm_mm->page_table_lock);
+
+		if (err == VM_FAULT_SIGBUS)
+			return -EFAULT;
+		if (err == VM_FAULT_OOM)
+			return -ENOMEM;
+
+		pte = pte_offset_map(dir, offset + address);
+next:
+		address += PAGE_SIZE;
+		pte++;
+	} while (address && address < end);
+	pte_unmap(pte - 1);
+	return 0;
+}
+
+static int swapoff_pgd(struct vm_area_struct * vma, pgd_t *dir,
+	unsigned long address, unsigned long size, cpt_context_t *ctx)
+{
+	pmd_t * pmd;
+	unsigned long offset, end;
+
+	if (pgd_none(*dir))
+		return 0;
+	if (pgd_bad(*dir)) {
+		pgd_ERROR(*dir);
+		pgd_clear(dir);
+		return 0;
+	}
+	pmd = pmd_offset(dir, address);
+	offset = address & PGDIR_MASK;
+	address &= ~PGDIR_MASK;
+	end = address + size;
+	if (end > PGDIR_SIZE)
+		end = PGDIR_SIZE;
+	if (address >= end)
+		BUG();
+	do {
+		int err;
+
+		err = swapoff_pmd(vma, pmd, address, end - address,
+				  offset, ctx);
+		if (err)
+			return err;
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address && address < end);
+	return 0;
+}
+
+static int swapoff_vma(struct vm_area_struct *vma, task_t *p,
+		       cpt_context_t *ctx)
+{
+	pgd_t * pgd = pgd_offset(vma->vm_mm, vma->vm_start);
+	unsigned long start = vma->vm_start, end = vma->vm_end;
+
+	do {
+		int err;
+		err = swapoff_pgd(vma, pgd, start, end - start, ctx);
+		if (err)
+			return err;
+		start = (start + PGDIR_SIZE) & PGDIR_MASK;
+		pgd++;
+	} while (start && start < end);
+	return 0;
+}
+
+/* Scan VE task list and scan not-trivial mm to fetch lazy pages.
+ * We do not guarantee, that all tasks will be swept, we do not guarantee
+ * all tasks will be swept only once. Just do the best efforts. */
+
+int rst_swapoff(cpt_context_t *ctx)
+{
+	int err = 0;
+	task_t *p, *put_it;
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (env == NULL)
+		return 0;
+
+	put_it = NULL;
+
+	read_lock(&tasklist_lock);
+restart:
+	err = 0;
+	if (list_empty(&env->vetask_lh))
+		goto out;
+	p = __first_task_ve(env);
+	for (; p != NULL; p = __next_task_ve(env, p)) {
+		struct mm_struct *mm;
+		struct vm_area_struct *vma;
+
+		if (!thread_group_leader(p))
+			continue;
+
+		task_lock(p);
+		mm = p->mm;
+		if (mm == NULL) {
+			task_unlock(p);
+			continue;
+		}
+		atomic_inc(&mm->mm_users);
+		task_unlock(p);
+
+		get_task_struct(p);
+		read_unlock(&tasklist_lock);
+
+		if (put_it)
+			put_task_struct(put_it);
+		put_it = NULL;
+
+		down_read(&mm->mmap_sem);
+		spin_lock(&mm->page_table_lock);
+		for (vma = mm->mmap; vma; vma = vma->vm_next) {
+			if (is_vm_hugetlb_page(vma))
+				continue;
+			if (!(vma->vm_flags & VM_WRITE))
+				continue;
+			if (vma->vm_flags & VM_SHARED)
+				continue;
+			err = swapoff_vma(vma, p, ctx);
+			if (err)
+				break;
+		}
+		spin_unlock(&mm->page_table_lock);
+		up_read(&mm->mmap_sem);
+
+		mmput(mm);
+
+		/* We are not in hurry at all. */
+		yield();
+
+		read_lock(&tasklist_lock);
+		put_it = p;
+		if (err)
+			break;
+		if (p->exit_state)
+			goto restart;
+	}
+out:
+	read_unlock(&tasklist_lock);
+
+	if (put_it)
+		put_task_struct(put_it);
+
+	put_ve(env);
+	return err;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_net.c linux-2.6.9-ve023stab054/kernel/cpt/rst_net.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_net.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_net.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,429 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/ve.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+#include "cpt_net.h"
+
+#include "cpt_syscalls.h"
+
+extern struct in_ifaddr *inet_alloc_ifa(void);
+extern int inet_insert_ifa(struct in_ifaddr *ifa);
+
+int rst_restore_ifaddr(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NET_IFADDR];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_ifaddr_image di;
+	struct net_device *dev;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_IFADDR || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int cindex = -1;
+		int err;
+		err = rst_get_object(CPT_OBJ_NET_IFADDR, sec, &di, ctx);
+		if (err)
+			return err;
+		cindex = di.cpt_index;
+		rtnl_lock();
+		dev = __dev_get_by_index(cindex);
+		if (dev && di.cpt_family == AF_INET) {
+			struct in_device *in_dev;
+			struct in_ifaddr *ifa;
+			if ((in_dev = __in_dev_get(dev)) == NULL)
+				in_dev = inetdev_init(dev);
+			ifa = inet_alloc_ifa();
+			if (ifa) {
+				ifa->ifa_local = di.cpt_address[0];
+				ifa->ifa_address = di.cpt_peer[0];
+				ifa->ifa_broadcast = di.cpt_broadcast[0];
+				ifa->ifa_prefixlen = di.cpt_masklen;
+				ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
+				ifa->ifa_flags = di.cpt_flags;
+				ifa->ifa_scope = di.cpt_scope;
+				memcpy(ifa->ifa_label, di.cpt_label, IFNAMSIZ);
+				in_dev_hold(in_dev);
+				ifa->ifa_dev   = in_dev;
+				err = inet_insert_ifa(ifa);
+				if (err)
+					eprintk_ctx("add ifaddr err %d for %d %s\n", err, di.cpt_index, di.cpt_label);
+			}
+		} else {
+			eprintk_ctx("unknown ifaddr 2 for %d\n", di.cpt_index);
+			err = -EINVAL;
+		}
+		rtnl_unlock();
+		if (err)
+			return err;
+		sec += di.cpt_next;
+	}
+	return 0;
+}
+
+static int rewrite_rtmsg(struct nlmsghdr *nlh, struct cpt_context *ctx)
+{
+	int min_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *rta = (void*)nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(rta, attrlen)) {
+			rta = RTA_NEXT(rta, attrlen);
+		}
+	}
+	return rtm->rtm_protocol == RTPROT_KERNEL;		   
+}
+
+int rst_restore_route(struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct msghdr msg;
+	struct iovec iov;
+	struct sockaddr_nl nladdr;
+	mm_segment_t oldfs;
+	loff_t sec = ctx->sections[CPT_SECT_NET_ROUTE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr v;
+	char *pg;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_ROUTE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	if (h.cpt_hdrlen >= h.cpt_next)
+		return 0;
+
+	sec += h.cpt_hdrlen;
+	err = rst_get_object(CPT_OBJ_NET_ROUTE, sec, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
+	if (err)
+		return err;
+
+	pg = (char*)__get_free_page(GFP_KERNEL);
+	if (pg == NULL) {
+		err = -ENOMEM;
+		goto out_sock;
+	}
+
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family = AF_NETLINK;
+
+	endsec = sec + v.cpt_next;
+	sec += v.cpt_hdrlen;
+
+	while (sec < endsec) {
+		struct nlmsghdr *n;
+		struct nlmsghdr nh;
+		int kernel_flag;
+
+		if (endsec - sec < sizeof(nh))
+			break;
+
+		err = ctx->pread(&nh, sizeof(nh), ctx, sec);
+		if (err)
+			goto out_sock_pg;
+		if (nh.nlmsg_len < sizeof(nh) || nh.nlmsg_len > PAGE_SIZE ||
+		    endsec - sec < nh.nlmsg_len) {
+			err = -EINVAL;
+			goto out_sock_pg;
+		}
+		err = ctx->pread(pg, nh.nlmsg_len, ctx, sec);
+		if (err)
+			goto out_sock_pg;
+
+		n = (struct nlmsghdr*)pg;
+		n->nlmsg_flags = NLM_F_REQUEST|NLM_F_APPEND|NLM_F_CREATE;
+
+		err = rewrite_rtmsg(n, ctx);
+		if (err < 0)
+			goto out_sock_pg;
+		kernel_flag = err;
+
+		iov.iov_base=n;
+		iov.iov_len=nh.nlmsg_len;
+		msg.msg_name=&nladdr;
+		msg.msg_namelen=sizeof(nladdr);
+		msg.msg_iov=&iov;
+		msg.msg_iovlen=1;
+		msg.msg_control=NULL;
+		msg.msg_controllen=0;
+		msg.msg_flags=MSG_DONTWAIT;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_sendmsg(sock, &msg, nh.nlmsg_len);
+		set_fs(oldfs);
+
+		if (err < 0)
+			goto out_sock_pg;
+		err = 0;
+
+		iov.iov_base=pg;
+		iov.iov_len=PAGE_SIZE;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
+		set_fs(oldfs);
+		if (err != -EAGAIN) {
+			if (err == NLMSG_LENGTH(sizeof(struct nlmsgerr)) &&
+			    n->nlmsg_type == NLMSG_ERROR) {
+				struct nlmsgerr *e = NLMSG_DATA(n);
+				if (e->error != -EEXIST || !kernel_flag)
+					eprintk_ctx("NLMERR: %d\n", e->error);
+			} else {
+				eprintk_ctx("Res: %d %d\n", err, n->nlmsg_type);
+			}
+		}
+		err = 0;
+		sec += NLMSG_ALIGN(nh.nlmsg_len);
+	}
+
+out_sock_pg:
+	free_page((unsigned long)pg);
+out_sock:
+	sock_release(sock);
+	return err;
+}
+
+int rst_resume_network(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	env->disable_net = 0;
+	put_ve(env);
+	return 0;
+}
+
+int rst_restore_netdev(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NET_DEVICE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_netdev_image di;
+	struct net_device *dev;
+
+	get_exec_env()->disable_net = 1;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_DEVICE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int err;
+		struct net_device *dev_new;
+		err = rst_get_object(CPT_OBJ_NET_DEVICE, sec, &di, ctx);
+		if (err)
+			return err;
+		rtnl_lock();
+		dev = __dev_get_by_name(di.cpt_name);
+		if (dev) {
+			if (dev->ifindex != di.cpt_index) {
+				dev_new = __dev_get_by_index(di.cpt_index);
+				if (!dev_new) {
+					write_lock_bh(&dev_base_lock);
+					hlist_del(&dev->index_hlist);
+					if (dev->iflink == dev->ifindex)
+						dev->iflink = di.cpt_index;
+					dev->ifindex = di.cpt_index;
+					hlist_add_head(&dev->index_hlist,
+						dev_index_hash(dev->ifindex,
+							get_exec_env()));
+					write_unlock_bh(&dev_base_lock);
+				} else {
+					write_lock_bh(&dev_base_lock);
+					hlist_del(&dev->index_hlist);
+					hlist_del(&dev_new->index_hlist);
+					if (dev_new->iflink == dev_new->ifindex)
+						dev_new->iflink = dev->ifindex;
+					dev_new->ifindex = dev->ifindex;
+					if (dev->iflink == dev->ifindex)
+						dev->iflink = di.cpt_index;
+					dev->ifindex = di.cpt_index;
+					hlist_add_head(&dev->index_hlist,
+						dev_index_hash(dev->ifindex,
+							get_exec_env()));
+					hlist_add_head(&dev_new->index_hlist,
+						dev_index_hash(dev_new->ifindex,
+							get_exec_env()));
+					write_unlock_bh(&dev_base_lock);
+				}
+			}
+			if (di.cpt_flags^dev->flags) {
+				err = dev_change_flags(dev, di.cpt_flags);
+				if (err)
+					eprintk_ctx("dev_change_flags err: %d\n", err);
+			}
+		} else {
+			eprintk_ctx("unknown interface 2 %s\n", di.cpt_name);
+		}
+		rtnl_unlock();
+		sec += di.cpt_next;
+	}
+	return 0;
+}
+
+static int dumpfn(void *arg)
+{
+	int i;
+	int *pfd = arg;
+	char *argv[] = { "iptables-restore", "-c", NULL };
+
+	if (pfd[0] != 0)
+		sc_dup2(pfd[0], 0);
+
+	for (i=1; i<current->files->max_fds; i++)
+		sc_close(i);
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/sbin/iptables-restore", argv, NULL);
+	if (i == -ENOENT)
+		i = sc_execve("/usr/sbin/iptables-restore", argv, NULL);
+	eprintk("failed to exec iptables-restore: %d\n", i);
+	return -1;
+}
+
+static int rst_restore_iptables(struct cpt_context * ctx)
+{
+	int err;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	int n;
+	struct cpt_section_hdr h;
+	loff_t sec = ctx->sections[CPT_SECT_NET_IPTABLES];
+	loff_t end;
+	int pid;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_IPTABLES || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	if (h.cpt_hdrlen == h.cpt_next)
+		return 0;
+	if (h.cpt_hdrlen > h.cpt_next)
+		return -EINVAL;
+	sec += h.cpt_hdrlen;
+	err = rst_get_object(CPT_OBJ_NAME, sec, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	pid = err = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
+	if (err < 0)
+		goto out;
+	f = fget(pfd[1]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	ctx->file->f_pos = sec + v.cpt_hdrlen;
+	end = sec + v.cpt_next;
+	do {
+		char *p;
+		char buf[16];
+		mm_segment_t oldfs;
+
+		n = end - ctx->file->f_pos;
+		if (n > sizeof(buf))
+			n = sizeof(buf);
+
+		if (ctx->read(buf, n, ctx))
+			break;
+		if ((p = memchr(buf, 0, n)) != NULL)
+			n = p - buf;
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		f->f_op->write(f, buf, n, &f->f_pos);
+		set_fs(oldfs);
+	} while (ctx->file->f_pos < end);
+
+	fput(f);
+
+	clear_tsk_thread_flag(current,TIF_SIGPENDING);
+
+	if ((err = sc_waitx(pid, 0)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+
+	return 0;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	return err;
+}
+
+int rst_restore_net(struct cpt_context *ctx)
+{
+	int err;
+
+	err = rst_restore_netdev(ctx);
+	if (!err)
+		err = rst_restore_ifaddr(ctx);
+	if (!err)
+		err = rst_restore_route(ctx);
+	if (!err)
+		err = rst_restore_iptables(ctx);
+	if (!err)
+		err = rst_restore_ip_conntrack(ctx);
+	return err;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_pagein.c linux-2.6.9-ve023stab054/kernel/cpt/rst_pagein.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_pagein.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_pagein.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,917 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/mman.h>
+#include <linux/vmalloc.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#include <linux/swapops.h>
+#include <linux/swap.h>
+#include <linux/cpt_image.h>
+
+#ifdef CONFIG_VE
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+#endif
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+#include "cpt_pagein.h"
+
+#include "cpt_syscalls.h"
+
+#define PGIN_MAJOR_MAX 254
+#define PGIN_MAJOR_MIN 202
+
+#define PGIN_TIMEOUT	(60*HZ)
+#define PGIN_HEARTBEAT	(60*HZ)
+#define PGIN_IDLEWAIT	(10*HZ)
+#define PGIN_WAIT	(120*HZ)
+
+static spinlock_t pgin_lock = SPIN_LOCK_UNLOCKED;
+
+struct pgin_device {
+	int harderror;
+	spinlock_t queue_lock;
+	struct list_head queue_head;
+	struct gendisk *disk;
+	cpt_context_t *ctx;
+	int blksize;
+	u64 bytesize;
+	struct completion *startup;
+	struct completion *end;
+	struct semaphore tx_sem;
+	struct timer_list timer;
+	int dead;
+	int qlen;
+	int npgs;
+	long reqnum;
+	long rtt;
+	int rttmin;
+	int rttmax;
+};
+
+static void pgin_end_request(struct request *req)
+{
+	int uptodate = (req->errors == 0) ? 1 : 0;
+	request_queue_t *q = req->q;
+	struct pgin_device *d = req->rq_disk->private_data;
+	unsigned long flags;
+
+	spin_lock_bh(&d->queue_lock);
+	while (req->ref_count > 1) {
+		spin_unlock_bh(&d->queue_lock);
+		yield();
+		spin_lock_bh(&d->queue_lock);
+	}
+	spin_unlock_bh(&d->queue_lock);
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (!end_that_request_first(req, uptodate, req->nr_sectors))
+		end_that_request_last(req);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+int pgin_prepare_swap_signature(struct pgin_device *d, struct request *req)
+{
+	unsigned long pg;
+	char *ptr;
+	union swap_header *swh;
+	struct bio *bio;
+
+	pg = __get_free_page(GFP_KERNEL);
+	if (!pg)
+		return -ENOMEM;
+
+	swh = (union swap_header *)pg;
+	memset(swh, 0, PAGE_SIZE);
+	memcpy(swh->magic.magic, "SWAPSPACE2", 10);
+	swh->info.version = 1;
+	swh->info.last_page = d->ctx->lazypages + 1;
+	swh->info.nr_badpages = 0;
+
+	ptr = (char*)pg;
+	rq_for_each_bio(bio, req) {
+		int i;
+		struct bio_vec *bvec;
+		bio_for_each_segment(bvec, bio, i) {
+			void *kaddr = kmap(bvec->bv_page);
+			memcpy(kaddr + bvec->bv_offset, ptr, bvec->bv_len);
+			ptr += bvec->bv_len;
+			kunmap(bvec->bv_page);
+		}
+	}
+
+	free_page(pg);
+	return 0;
+}
+
+#if 0
+int pgin_prepare_fake(struct pgin_device *d, struct request *req, int index)
+{
+	int pos = 0;
+	struct bio *bio;
+	cpt_context_t *ctx = d->ctx;
+
+	rq_for_each_bio(bio, req) {
+		int i;
+		struct bio_vec *bvec;
+		bio_for_each_segment(bvec, bio, i) {
+			int err;
+			loff_t off;
+			void *kaddr = kmap(bvec->bv_page);
+			off = ctx->lazytable[index + (pos/PAGE_SIZE)].mm_id;
+			err = ctx->pread(kaddr + bvec->bv_offset, bvec->bv_len, d->ctx, off);
+			kunmap(bvec->bv_page);
+			if (err) {
+				eprintk_ctx("pgin read err %d @ %d\n", err, pos);
+				return -EIO;
+			}
+			pos += bvec->bv_len;
+		}
+	}
+
+	return 1;
+}
+#endif
+
+static int nread(struct file *file, char *buf, int len)
+{
+	int offset = 0;
+
+	while (offset < len) {
+		int res;
+		res = vfs_read(file, buf+offset, len-offset, &file->f_pos);
+		if (res < 0)
+			return res;
+		if (res == 0)
+			return -EIO;
+		offset += res;
+	}
+	return 0;
+}
+
+int pgin_send_req(struct pgin_device *d, struct request *req)
+{
+	mm_segment_t oldfs;
+	int result;
+	struct pgin_request request;
+	unsigned long size = req->nr_sectors << 9;
+	cpt_context_t *ctx = d->ctx;
+	struct file *file = d->ctx->pagein_file_out;
+	int index;
+	int npgs;
+
+	d->ctx->last_pagein = jiffies;
+
+//	dprintk_ctx("pgin: sec=%lu len=%lu\n", req->sector, req->nr_sectors);
+
+	if (size % PAGE_SIZE) {
+		eprintk_ctx("pgin: size %% PAGE_SIZE\n");
+		goto error_out;
+	}
+
+	npgs = size / PAGE_SIZE;
+	d->npgs = npgs;
+
+	index = (req->sector << 9) / PAGE_SIZE;
+	if (index == 0) {
+		if (npgs != 1) {
+			eprintk_ctx("pgin: signature npgs=%d\n", npgs);
+			goto error_out;
+		}
+		if (pgin_prepare_swap_signature(d, req))
+			goto error_out;
+		req->errors = 0;
+		return 1;
+	}
+	index--;
+
+	if (index + npgs > d->ctx->lazypages) {
+		eprintk_ctx("pgin: out of range: %d %d %d\n", index, npgs, d->ctx->lazypages);
+		dump_stack();
+		goto error_out;
+	}
+
+#if 0
+	if (pgin_prepare_fake(d, req, index) < 0)
+		goto error_out;
+	req->errors = 0;
+	return 1;
+#endif
+
+	if (file == NULL) {
+		eprintk_ctx("pagein: file == NULL\n");
+		goto error_out;
+	}
+	if (d->dead) {
+		eprintk_ctx("pagein: dead\n");
+		goto error_out;
+	}
+
+	mod_timer(&d->timer, jiffies + PGIN_HEARTBEAT);
+
+	request.rmid = PGIN_RMID;
+	request.size = npgs;
+	request.index = index;
+	memcpy(&request.handle, &req, sizeof(req));
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	down(&d->tx_sem);
+	result = vfs_write(file, (char*)&request, sizeof(request), &file->f_pos);
+	up(&d->tx_sem);
+	set_fs(oldfs);
+
+	if (result != sizeof(request)) {
+		eprintk_ctx("pagein: damn %d\n", result);
+		goto error_out;
+	}
+	return 0;
+
+error_out:
+	req->errors++;
+	return 0;
+}
+
+static int pagein_send_start(cpt_context_t *ctx)
+{
+	int result;
+	struct pgin_request request;
+	struct pgin_reply reply;
+	struct file *file = ctx->pagein_file_out;
+
+	if (file == NULL)
+		return -ENOENT;
+
+	request.rmid = PGIN_RMID;
+	request.size = PGIN_START;
+	request.index = 0;
+	memset(&request.handle, 0, sizeof(request.handle));
+
+	result = vfs_write(file, (char*)&request, sizeof(request), &file->f_pos);
+	if (result < 0)
+		return result;
+	if (result != sizeof(request))
+		return -EIO;
+	result = nread(ctx->pagein_file_in, (char*)&reply, sizeof(reply));
+	if (result)
+		return result;
+	if (reply.rmid != PGIN_RMID)
+		return -EIO;
+	return 0;
+}
+
+static int pagein_send_stop(cpt_context_t *ctx)
+{
+	int result;
+	struct pgin_request request;
+	struct file *file = ctx->pagein_file_out;
+
+	if (file == NULL)
+		return -ENOENT;
+
+	request.rmid = PGIN_RMID;
+	request.size = PGIN_STOP;
+	request.index = 0;
+	memset(&request.handle, 0, sizeof(request.handle));
+
+	result = vfs_write(file, (char*)&request, sizeof(request), &file->f_pos);
+	if (result < 0)
+		return result;
+
+	return result == sizeof(request) ? 0 : -EIO;
+}
+
+
+static struct request *pgin_find_request(struct pgin_device *d, u64 *handle)
+{
+	struct request *req;
+	struct request *xreq;
+
+	memcpy(&xreq, handle, sizeof(xreq));
+
+	spin_lock_bh(&d->queue_lock);
+	list_for_each_entry(req, &d->queue_head, queuelist) {
+		if (req != xreq)
+			continue;
+		list_del_init(&req->queuelist);
+		d->qlen--;
+		spin_unlock_bh(&d->queue_lock);
+		return req;
+	}
+	spin_unlock_bh(&d->queue_lock);
+	return NULL;
+}
+
+static void pgin_flush_requests(struct pgin_device *d)
+{
+	struct request *req;
+
+again:
+	spin_lock_bh(&d->queue_lock);
+	list_for_each_entry(req, &d->queue_head, queuelist) {
+		list_del_init(&req->queuelist);
+		d->qlen--;
+		spin_unlock_bh(&d->queue_lock);
+
+		pgin_end_request(req);
+		goto again;
+	}
+	spin_unlock_bh(&d->queue_lock);
+}
+
+
+static inline int nread_bvec(struct file *file, struct bio_vec *bvec)
+{
+	int result;
+	void *kaddr = kmap(bvec->bv_page);
+	result = nread(file, kaddr + bvec->bv_offset, bvec->bv_len);
+	kunmap(bvec->bv_page);
+	return result;
+}
+
+struct request *pgin_read_stat(struct pgin_device *d)
+{
+	int result;
+	int rtt;
+	struct pgin_reply reply;
+	struct request *req;
+	struct bio *bio;
+	cpt_context_t *ctx = d->ctx;
+
+	if (d->ctx->pagein_file_in == NULL) {
+		result = -EINVAL;
+		goto out;
+	}
+
+	result = nread(d->ctx->pagein_file_in, (char*)&reply, sizeof(reply));
+	/* nread was interruped by SIGKILL, so just exit */
+	if (result == -ERESTARTSYS)
+		goto out;
+	if (result < 0) {
+		eprintk_ctx("pgin_read_stat: nread: %d\n", result);
+		goto out;
+	}
+//	dprintk_ctx("pgin: repl\n");
+
+	req = pgin_find_request(d, &reply.handle);
+	if (req == NULL) {
+		result = -EBADR;
+		eprintk_ctx("pgin: EBADR\n");
+		goto out;
+	}
+
+	if (reply.error) {
+		req->errors++;
+		eprintk_ctx("pgin: ERR\n");
+		return req;
+	}
+
+	rq_for_each_bio(bio, req) {
+		int i;
+		struct bio_vec *bvec;
+		bio_for_each_segment(bvec, bio, i) {
+			result = nread_bvec(d->ctx->pagein_file_in, bvec);
+			if (result < 0) {
+				eprintk_ctx("pgin_read_stat: nread_bvec: %d\n", result);
+				goto out;
+			}
+		}
+	}
+	rtt = jiffies - req->start_time;
+	if (!d->rtt) {
+		d->rtt = d->rttmin = d->rttmax = rtt;
+		d->reqnum = 1;
+	} else {
+		d->rtt += rtt;
+		d->reqnum++;
+		if (rtt < d->rttmin)
+			d->rttmin = rtt;
+		if (rtt > d->rttmax)
+			d->rttmax = rtt;
+	}
+	return req;
+
+out:
+	d->harderror = result;
+	return NULL;
+}
+
+static void do_pgin_request(request_queue_t * q)
+{
+	struct request *req;
+
+	while ((req = elv_next_request(q)) != NULL) {
+		struct pgin_device *d;
+		cpt_context_t *ctx;
+
+		blkdev_dequeue_request(req);
+
+		d = req->rq_disk->private_data;
+		ctx = d->ctx;
+
+		if (rq_data_dir(req) == WRITE) {
+			eprintk_ctx("Write on pgin device\n");
+			goto error_out;
+		}
+
+		req->errors = 0;
+		spin_unlock_irq(q->queue_lock);
+
+		spin_lock_bh(&d->queue_lock);
+		list_add(&req->queuelist, &d->queue_head);
+		d->qlen++;
+		req->ref_count++;
+		spin_unlock_bh(&d->queue_lock);
+
+		if (pgin_send_req(d, req)) {
+			spin_lock_bh(&d->queue_lock);
+			list_del_init(&req->queuelist);
+			d->qlen--;
+			req->ref_count--;
+			spin_unlock_bh(&d->queue_lock);
+
+			pgin_end_request(req);
+
+			spin_lock_irq(q->queue_lock);
+			continue;
+		}
+
+		if (req->errors) {
+			spin_lock_bh(&d->queue_lock);
+			list_del_init(&req->queuelist);
+			req->ref_count--;
+			d->qlen--;
+			spin_unlock_bh(&d->queue_lock);
+
+			pgin_end_request(req);
+
+			spin_lock_irq(q->queue_lock);
+			continue;
+		}
+
+		spin_lock_bh(&d->queue_lock);
+		req->ref_count--;
+		spin_unlock_bh(&d->queue_lock);
+
+		spin_lock_irq(q->queue_lock);
+		continue;
+
+error_out:
+		req->errors++;
+		spin_unlock_irq(q->queue_lock);
+
+		pgin_end_request(req);
+
+		spin_lock_irq(q->queue_lock);
+	}
+	return;
+}
+
+static struct block_device_operations pgin_fops =
+{
+	.owner =	THIS_MODULE,
+};
+
+static void heartbeat_timer(unsigned long arg)
+{
+	cpt_context_t *ctx = (cpt_context_t *)arg;
+	struct pgin_device *d = ctx->pagein_dev;
+	unsigned long oldest;
+	struct request *req;
+
+	if (d->dead)
+		return;
+
+	oldest = jiffies;
+	spin_lock(&d->queue_lock);
+	list_for_each_entry(req, &d->queue_head, queuelist) {
+		if (time_before(req->start_time, oldest))
+			oldest = req->start_time;
+	}
+	spin_unlock(&d->queue_lock);
+
+	if ((long)(jiffies - jiffies) > PGIN_TIMEOUT) {
+		eprintk_ctx("Virtual memory transfer is stuck; killing VE\n");
+
+		d->dead = 1;
+		if (ctx->pgin_task) {
+			read_lock(&tasklist_lock);
+			if (!ctx->pgin_task->exit_state)
+				send_sig(SIGKILL, ctx->pgin_task, 1);
+			read_unlock(&tasklist_lock);
+			dprintk_ctx("rst: pgin alarm... ");
+			put_task_struct(ctx->pgin_task);
+			ctx->pgin_task = NULL;
+		}
+	} else {
+		mod_timer(&d->timer, jiffies + PGIN_HEARTBEAT);
+	}
+}
+
+static int pagein_dev_init(cpt_context_t *ctx)
+{
+	int err = -ENOMEM;
+	struct gendisk *disk;
+	struct pgin_device *d;
+
+	d = kmalloc(sizeof(*d), GFP_KERNEL);
+	if (d == NULL)
+		return -ENOMEM;
+
+	memset(d, 0, sizeof(*d));
+	ctx->pagein_dev = d;
+	d->reqnum = 1;
+
+	disk = alloc_disk(1);
+	if (!disk)
+		goto out;
+	d->disk = disk;
+	disk->queue = blk_init_queue(do_pgin_request, &pgin_lock);
+	if (!disk->queue)
+		goto out;
+#if 0
+	/* Play with queue. We do not use this right now, but I leave
+	 * the dead chunk here to remember, what we can do if we need to.
+	 */
+	disk->queue->unplug_thresh = 1;
+	disk->queue->unplug_delay = 0;
+#endif
+
+	disk->major = PGIN_MAJOR_MAX;
+	while (disk->major >= PGIN_MAJOR_MIN) {
+		err = register_blkdev(disk->major, "pgin");
+		if (err == 0)
+			break;
+		disk->major--;
+	}
+	if (disk->major < PGIN_MAJOR_MIN) {
+		disk->major = 0;
+		goto out;
+	}
+
+	spin_lock_init(&d->queue_lock);
+	INIT_LIST_HEAD(&d->queue_head);
+	d->blksize = PAGE_SIZE;
+	d->bytesize = 0x7ffffc00ULL << 10;
+	d->ctx = ctx;
+	d->end = &ctx->pgin_notify;
+	init_MUTEX(&d->tx_sem);
+	init_timer(&d->timer);
+	d->timer.function = heartbeat_timer;
+	d->timer.data = (unsigned long)ctx;
+	disk->first_minor = 0;
+	disk->fops = &pgin_fops;
+	disk->private_data = d;
+	disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
+	sprintf(disk->disk_name, "pgin%d", disk->major);
+	set_capacity(disk, 0x7ffffc00ULL << 1);
+	add_disk(disk);
+
+	return 0;
+
+out:
+	if (d->disk) {
+		if (d->disk->major)
+			unregister_blkdev(d->disk->major, "pgin");
+		if (d->disk->queue)
+			blk_cleanup_queue(d->disk->queue);
+		put_disk(d->disk);
+	}
+	return err;
+}
+
+static void pagein_dev_cleanup(cpt_context_t *ctx)
+{
+	struct gendisk *disk;
+
+	if (ctx->pagein_dev) {
+		disk = ctx->pagein_dev->disk;
+		if (disk) {
+			int major = disk->major;
+			del_gendisk(disk);
+			blk_cleanup_queue(disk->queue);
+			put_disk(disk);
+			if (major)
+				unregister_blkdev(major, "pgin");
+		}
+		kfree(ctx->pagein_dev);
+		ctx->pagein_dev = NULL;
+	}
+}
+
+static int plugin_swap(cpt_context_t * ctx)
+{
+	int major;
+	int err;
+	int type;
+	mm_segment_t oldfs;
+	char devname[16];
+
+	major = ctx->pagein_dev->disk->major;
+	sprintf(devname, "/dev/pgin%d", major);
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+
+	sys_unlink(devname);
+	err = sys_mknod(devname, 0600|S_IFBLK, major<<8);
+	if (err) {
+		eprintk_ctx("sys_mknod %d\n", err);
+		goto out;
+	}
+
+	err = sys_swapon(devname, SWAP_FLAG_READONLY);
+	if (err) {
+		eprintk_ctx("sys_swapon %d\n", err);
+		goto out;
+	}
+
+	swap_list_lock();
+	for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
+		struct swap_info_struct * p;
+		p = swap_info + type;
+		if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
+			if (imajor(p->swap_file->f_dentry->d_inode) == major) {
+				p->lowest_bit = p->max;
+				p->highest_bit = 0;
+				ctx->pgin_swp = p;
+				break;
+			}
+		}
+	}
+	swap_list_unlock();
+	if (type < 0) {
+		eprintk_ctx("swapfile has been lost\n");
+		err = -EFAULT;
+		goto out;
+	}
+	ctx->lazytype = type;
+
+out:
+	sys_unlink(devname);
+	set_fs(oldfs);
+	return err;
+}
+
+static int pgin_thread(void *arg)
+{
+	struct request *req;
+	struct cpt_context *ctx = arg;
+	struct pgin_device *d;
+
+	daemonize("pgind");
+	allow_signal(SIGKILL);
+	set_fs(KERNEL_DS);
+
+	d = ctx->pagein_dev;
+
+	if (d->startup)
+		complete(d->startup);
+
+	while ((req = pgin_read_stat(d)) != NULL)
+		pgin_end_request(req);
+
+	d->dead = 1;
+	pgin_flush_requests(d);
+	del_timer_sync(&d->timer);
+	if (d->end)
+		complete(d->end);
+	return 0;
+}
+
+static void rst_start_pagein_worker(void *_info)
+{
+	pagein_info_t *info;
+	
+	info = (pagein_info_t *)_info;
+	info->pid = kernel_thread(pgin_thread, info->ctx,
+			CLONE_KERNEL | CLONE_VM | SIGCHLD);
+	complete(&info->done);
+}
+
+int rst_setup_pagein(cpt_context_t *ctx)
+{
+	struct completion comp;
+	mm_segment_t oldfs;
+	pagein_info_t create;
+	DECLARE_WORK(work, rst_start_pagein_worker, &create);
+	int err;
+
+	if (ctx->lazypages == 0)
+		return 0;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = pagein_send_start(ctx);
+	set_fs(oldfs);
+	if (err) {
+		eprintk_ctx("rst_setup_pagein: pagein_send_start: %d\n", err);
+		return err;
+	}
+	ctx->last_pagein = jiffies;
+
+	err = pagein_dev_init(ctx);
+	if (err) {
+		eprintk_ctx("pagein_dev_init\n");
+		return err;
+	}
+
+	init_completion(&comp);
+	ctx->pagein_dev->startup = &comp;
+
+	create.pid = -EINVAL;
+	create.ctx = ctx;
+	init_completion(&create.done);
+	schedule_work(&work);
+	wait_for_completion(&create.done);
+
+	if (create.pid < 0) {
+		eprintk_ctx("kernel_thread pgin\n");
+		return create.pid;
+	}
+
+	read_lock(&tasklist_lock);
+	ctx->pgin_task = find_task_by_pid_all(create.pid);
+	if (ctx->pgin_task)
+		get_task_struct(ctx->pgin_task);
+	read_unlock(&tasklist_lock);
+	if (ctx->pgin_task == NULL) {
+		eprintk_ctx("pgin task is lost\n");
+		return -ESRCH;
+	}
+	dprintk_ctx("pgin pid=%d\n", create.pid);
+
+	wait_for_completion(&comp);
+	ctx->pagein_dev->startup = NULL;
+
+	err = plugin_swap(ctx);
+	if (err) {
+		eprintk_ctx("rst_setup_pagein: plugin_swap: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+int rst_pageind(cpt_context_t *ctx)
+{
+	struct completion comp;
+
+	if (!ctx->pgin_swp)
+		return -EINVAL;
+
+	init_completion(&comp);
+	ctx->pgind_completion = &comp;
+	up(&ctx->main_sem);
+	wait_for_completion(&comp);
+	down(&ctx->main_sem);
+	ctx->pgind_completion = NULL;
+	return 0;
+}
+
+int rst_complete_pagein_swapoff(cpt_context_t *ctx, char *devname, int major)
+{
+	int err;
+	int sig_pending = 0;
+
+	do {
+		sys_mknod(devname, 0600|S_IFBLK, major<<8);
+
+		err = sys_swapoff(devname);
+
+		if (err && err != -EINVAL) {
+			struct ve_struct *env;
+
+			if (signal_pending(current)) {
+				clear_tsk_thread_flag(current, TIF_SIGPENDING);
+				sig_pending = 1;
+			}
+
+			env = get_ve_by_id(ctx->ve_id);
+			if (env) {
+				int delay;
+				send_sig(SIGKILL, env->init_entry, 1);
+				delay = 1;
+				while (atomic_read(&env->counter) != 1) {
+					current->state = TASK_INTERRUPTIBLE;
+					delay = (delay < HZ) ? (delay << 1) : HZ;
+					schedule_timeout(delay);
+				}
+				put_ve(env);
+			}
+		}
+		sys_unlink(devname);
+	} while (err && err != -EINVAL);
+
+	if (sig_pending)
+		set_tsk_thread_flag(current, TIF_SIGPENDING);
+
+	return err;
+}
+
+int rst_complete_pagein(cpt_context_t *ctx, int kill)
+{
+	int err;
+	mm_segment_t oldfs;
+
+	rst_drop_iter_dir(ctx);
+
+	if (!kill && ctx->pgin_swp) {
+		ctx->last_pagein = jiffies;
+		while ((long)(jiffies - ctx->last_pagein) < PGIN_IDLEWAIT &&
+		       (long)(jiffies - (unsigned long)ctx->cpt_jiffies64) < PGIN_WAIT) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(PGIN_IDLEWAIT);
+		}
+	}
+
+	if (ctx->pgin_swp) {
+		char devname[16];
+		int major = ctx->pagein_dev->disk->major;
+
+		dprintk_ctx("going to suck vm...\n");
+		ctx->pgin_swp = NULL;
+
+		sprintf(devname, "/dev/pgin%d", major);
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		sys_unlink(devname);
+		sys_mknod(devname, 0600|S_IFBLK, major<<8);
+		err = rst_swapoff(ctx);
+		if (!err)
+			err = sys_swapoff(devname);
+
+		sys_unlink(devname);
+
+		if (err && err != -EINVAL) {
+			eprintk_ctx("cannot get vm pages from source node: %d\n", err);
+			rst_complete_pagein_swapoff(ctx, devname, major);
+		}
+
+		set_fs(oldfs);
+		dprintk_ctx("...sucked\n");
+
+		if (err)
+			eprintk_ctx("sys_swapoff: %d\n", err);
+	}
+
+	if (ctx->pagein_file_out) {
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		pagein_send_stop(ctx);
+		set_fs(oldfs);
+	}
+
+	if (ctx->pgind_completion)
+		complete(ctx->pgind_completion);
+
+	if (ctx->pgin_task) {
+		read_lock(&tasklist_lock);
+		if (!ctx->pgin_task->exit_state)
+			send_sig(SIGKILL, ctx->pgin_task, 1);
+		read_unlock(&tasklist_lock);
+		put_task_struct(ctx->pgin_task);
+		ctx->pgin_task = NULL;
+		dprintk_ctx("rst: wfc pgin_notify... ");
+		wait_for_completion(&ctx->pgin_notify);
+		dprintk_ctx("done\n");
+	}
+	pagein_dev_cleanup(ctx);
+	return 0;
+}
+
+int pagein_info_printf(char *buf, cpt_context_t *ctx)
+{
+	int len = 0;
+
+	if (ctx->pagein_dev) {
+		len = sprintf(buf, " %d %d %d %d %d", 
+			       ctx->pagein_dev->qlen,
+			       (int)ctx->pagein_dev->rtt/(int)ctx->pagein_dev->reqnum,
+			       ctx->pagein_dev->rttmin,
+			       ctx->pagein_dev->rttmax,
+			       ctx->pagein_dev->npgs);
+		ctx->pagein_dev->rttmin = 0x7FFFFFF;
+		ctx->pagein_dev->rttmax = 0;
+	}
+
+	return len;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_proc.c linux-2.6.9-ve023stab054/kernel/cpt/rst_proc.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_proc.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_proc.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,596 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_ioctl.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+
+MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
+MODULE_LICENSE("GPL v2");
+
+/* List of contexts and lock protecting the list */
+struct list_head cpt_context_list;
+spinlock_t cpt_context_lock;
+
+static int proc_read(char *buffer, char **start, off_t offset,
+		     int length, int *eof, void *data)
+{
+	off_t pos = 0;
+	off_t begin = 0;
+	int len = 0;
+	cpt_context_t *ctx;
+
+	len += sprintf(buffer, "Ctx      Id       VE       State\n");
+
+	spin_lock(&cpt_context_lock);
+
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		len += sprintf(buffer+len,"%p %08x %-8u %d",
+			       ctx,
+			       ctx->contextid,
+			       ctx->ve_id,
+			       ctx->ctx_state
+			       );
+		len += pagein_info_printf(buffer+len, ctx);
+
+		buffer[len++] = '\n';
+
+		pos = begin+len;
+		if (pos < offset) {
+			len = 0;
+			begin = pos;
+		}
+		if (pos > offset+length)
+			goto done;
+	}
+	*eof = 1;
+
+done:
+	spin_unlock(&cpt_context_lock);
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+	if(len > length)
+		len = length;
+	if(len < 0)
+		len = 0;
+	return len;
+}
+
+void rst_context_release(cpt_context_t *ctx)
+{
+	list_del(&ctx->ctx_list);
+	spin_unlock(&cpt_context_lock);
+
+	if (ctx->ctx_state > 0)
+		rst_resume(ctx);
+	ctx->ctx_state = CPT_CTX_ERROR;
+
+	rst_close_dumpfile(ctx);
+
+	if (ctx->anonvmas) {
+		int h;
+		for (h = 0; h < CPT_ANONVMA_HSIZE; h++) {
+			while (!hlist_empty(&ctx->anonvmas[h])) {
+				struct hlist_node *elem = ctx->anonvmas[h].first;
+				hlist_del(elem);
+				kfree(elem);
+			}
+		}
+		free_page((unsigned long)ctx->anonvmas);
+	}
+	cpt_flush_error(ctx);
+	if (ctx->errorfile) {
+		fput(ctx->errorfile);
+		ctx->errorfile = NULL;
+	}
+	if (ctx->error_msg) {
+		free_page((unsigned long)ctx->error_msg);
+		ctx->error_msg = NULL;
+	}
+	rst_drop_iter_dir(ctx);
+	if (ctx->pagein_file_out)
+		fput(ctx->pagein_file_out);
+	if (ctx->pagein_file_in)
+		fput(ctx->pagein_file_in);
+	if (ctx->filejob_queue)
+		rst_flush_filejobs(ctx);
+	if (ctx->pgin_task)
+		put_task_struct(ctx->pgin_task);
+	if (ctx->vdso) {
+		free_page((unsigned long)ctx->vdso);
+	}
+	if (ctx->objcount)
+		eprintk_ctx("%d objects leaked\n", ctx->objcount);
+	kfree(ctx);
+
+	spin_lock(&cpt_context_lock);
+}
+
+static void __cpt_context_put(cpt_context_t *ctx)
+{
+	if (!--ctx->refcount)
+		rst_context_release(ctx);
+}
+
+static void cpt_context_put(cpt_context_t *ctx)
+{
+	spin_lock(&cpt_context_lock);
+	__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+}
+
+cpt_context_t * rst_context_open(void)
+{
+	cpt_context_t *ctx;
+
+	if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
+		rst_context_init(ctx);
+		spin_lock(&cpt_context_lock);
+		list_add_tail(&ctx->ctx_list, &cpt_context_list);
+		spin_unlock(&cpt_context_lock);
+		ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
+		if (ctx->error_msg != NULL)
+			ctx->error_msg[0] = 0;
+	}
+	return ctx;
+}
+
+void rst_report_error(int err, cpt_context_t *ctx)
+{
+	if (ctx->statusfile) {
+		mm_segment_t oldfs;
+		int status = 7 /* VZ_ENVCREATE_ERROR */;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		if (ctx->statusfile->f_op && ctx->statusfile->f_op->write)
+			ctx->statusfile->f_op->write(ctx->statusfile, (char*)&status, sizeof(status), &ctx->statusfile->f_pos);
+		set_fs(oldfs);
+		fput(ctx->statusfile);
+		ctx->statusfile = NULL;
+	}
+}
+
+
+cpt_context_t * cpt_context_lookup(unsigned int ctxid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->contextid == ctxid) {
+			ctx->refcount++;
+			spin_unlock(&cpt_context_lock);
+			return ctx;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return NULL;
+}
+
+static int rst_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	cpt_context_t *ctx;
+	struct file *dfile = NULL;
+
+	unlock_kernel();
+
+	if (cmd == CPT_TEST_CAPS) {
+		err = test_cpu_caps_and_features();
+		goto out_lock;
+	}
+
+	if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
+		cpt_context_t *old_ctx;
+
+		ctx = NULL;
+		if (cmd == CPT_JOIN_CONTEXT) {
+			err = -ENOENT;
+			ctx = cpt_context_lookup(arg);
+			if (!ctx)
+				goto out_lock;
+		}
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		file->private_data = ctx;
+
+		if (old_ctx) {
+			if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
+				old_ctx->sticky = 0;
+				old_ctx->refcount--;
+			}
+			__cpt_context_put(old_ctx);
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_lock;
+	}
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	if (ctx)
+		ctx->refcount++;
+	spin_unlock(&cpt_context_lock);
+
+	if (!ctx) {
+		cpt_context_t *old_ctx;
+
+		err = -ENOMEM;
+		ctx = rst_context_open();
+		if (!ctx)
+			goto out_lock;
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		if (!old_ctx) {
+			ctx->refcount++;
+			file->private_data = ctx;
+		} else {
+			old_ctx->refcount++;
+		}
+		if (old_ctx) {
+			__cpt_context_put(ctx);
+			ctx = old_ctx;
+		}
+		spin_unlock(&cpt_context_lock);
+	}
+
+	if (cmd == CPT_GET_CONTEXT) {
+		unsigned int contextid = (unsigned int)arg;
+
+		err = -EINVAL;
+		if (ctx->contextid && ctx->contextid != contextid)
+			goto out_nosem;
+		if (!ctx->contextid) {
+			cpt_context_t *c1 = cpt_context_lookup(contextid);
+			if (c1) {
+				cpt_context_put(c1);
+				err = -EEXIST;
+				goto out_nosem;
+			}
+			ctx->contextid = contextid;
+		}
+		spin_lock(&cpt_context_lock);
+		if (!ctx->sticky) {
+			ctx->sticky = 1;
+			ctx->refcount++;
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_nosem;
+	}
+
+	down(&ctx->main_sem);
+
+	err = -EBUSY;
+	if (ctx->ctx_state < 0)
+		goto out;
+
+	err = 0;
+	switch (cmd) {
+	case CPT_SET_DUMPFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			err = -EBADF;
+			dfile = fget(arg);
+			if (dfile == NULL)
+				break;
+			if (dfile->f_op == NULL ||
+			    dfile->f_op->read == NULL) {
+				fput(dfile);
+				break;
+			}
+			err = 0;
+		}
+		if (ctx->file)
+			fput(ctx->file);
+		ctx->file = dfile;
+		break;
+	case CPT_SET_PAGEINFDIN:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_in)
+			fput(ctx->pagein_file_in);
+		ctx->pagein_file_in = dfile;
+		break;
+	case CPT_SET_PAGEINFDOUT:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_out)
+			fput(ctx->pagein_file_out);
+		ctx->pagein_file_out = dfile;
+		break;
+	case CPT_ITER:
+		err = rst_iteration(ctx);
+		break;
+	case CPT_SET_LOCKFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->lockfile)
+			fput(ctx->lockfile);
+		ctx->lockfile = dfile;
+		break;
+	case CPT_SET_STATUSFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->statusfile)
+			fput(ctx->statusfile);
+		ctx->statusfile = dfile;
+		break;
+	case CPT_SET_ERRORFD:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->errorfile)
+			fput(ctx->errorfile);
+		ctx->errorfile = dfile;
+		break;
+	case CPT_SET_VEID:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->ve_id = arg;
+		break;
+	case CPT_UNDUMP:
+		if (ctx->ctx_state > 0) {
+			err = -ENOENT;
+			break;
+		}
+		ctx->ctx_state = CPT_CTX_UNDUMPING;
+		err = vps_rst_undump(ctx);
+		if (err) {
+			rst_report_error(err, ctx);
+			if (rst_kill(ctx) == 0)
+				ctx->ctx_state = CPT_CTX_IDLE;
+		} else {
+			ctx->ctx_state = CPT_CTX_UNDUMPED;
+		}
+		break;
+	case CPT_PAGEIND:
+		err = rst_pageind(ctx);
+		break;
+	case CPT_RESUME:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		err = rst_resume(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_KILL:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		err = rst_kill(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+out:
+	cpt_flush_error(ctx);
+	up(&ctx->main_sem);
+out_nosem:
+	cpt_context_put(ctx);
+out_lock:
+	lock_kernel();
+	return err;
+}
+
+static int rst_open(struct inode * inode, struct file * file)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int rst_release(struct inode * inode, struct file * file)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	file->private_data = NULL;
+	if (ctx)
+		__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+
+
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+static struct file_operations rst_fops =
+{
+	.owner		= THIS_MODULE,
+	.ioctl		= rst_ioctl,
+	.open		= rst_open,
+	.release	= rst_release,
+};
+
+
+static struct proc_dir_entry *proc_ent;
+extern void *schedule_tail_p;
+extern void schedule_tail_hook(void);
+
+int debug_level = 1;
+
+static struct ctl_table_header *ctl_header;
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 9476,
+		.procname	= "rst",
+		.data		= &debug_level,
+		.maxlen		= sizeof(debug_level),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+#ifdef CONFIG_X86_64
+
+static void *vzentry_forkret_get(void)
+{
+	unsigned char *p;
+
+	p = (unsigned char *)ret_from_fork;
+	return (void *)(*(u32 *)(p + 1) + p + 5);
+}
+
+static void vzentry_forkret_set(void *data)
+{
+	unsigned char *p;
+	long offset;
+
+	p = (unsigned char *)ret_from_fork;
+	offset = (unsigned long)data - (unsigned long)(p + 5);
+	if ((long)(s32)offset != offset) {
+		printk("vzentry_forkret_set: too long hook offset\n");
+		BUG();
+	}
+	*(u32 *)(p + 1) = offset;
+}
+
+#endif
+
+static int __init init_rst(void)
+{
+	int err;
+
+	err = -ENOMEM;
+	ctl_header = register_sysctl_table(root_table, 0);
+	if (!ctl_header)
+		goto err;
+
+	spin_lock_init(&cpt_context_lock);
+	INIT_LIST_HEAD(&cpt_context_list);
+
+	err = -EINVAL;
+	proc_ent = create_proc_entry_mod("rst", 0600, NULL, THIS_MODULE);
+	if (!proc_ent)
+		goto err_out;
+
+	rst_fops.read = proc_ent->proc_fops->read;
+	rst_fops.write = proc_ent->proc_fops->write;
+	rst_fops.llseek = proc_ent->proc_fops->llseek;
+	proc_ent->proc_fops = &rst_fops;
+
+	proc_ent->read_proc = proc_read;
+	proc_ent->data = NULL;
+	proc_ent->owner = THIS_MODULE;
+#ifdef CONFIG_X86_64
+	schedule_tail_p = vzentry_forkret_get();
+	vzentry_forkret_set(&schedule_tail_hook);
+#endif
+	return 0;
+
+err_out:
+	unregister_sysctl_table(ctl_header);
+err:
+	return err;
+}
+module_init(init_rst);
+
+static void __exit exit_rst(void)
+{
+#ifdef CONFIG_X86_64
+	/* This is wrong, of course. But still the best what we can do. */
+	vzentry_forkret_set(schedule_tail_p);
+#endif
+
+	remove_proc_entry("rst", NULL);
+	unregister_sysctl_table(ctl_header);
+
+	spin_lock(&cpt_context_lock);
+	while (!list_empty(&cpt_context_list)) {
+		cpt_context_t *ctx;
+		ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
+
+		if (!ctx->sticky)
+			ctx->refcount++;
+		ctx->sticky = 0;
+
+		BUG_ON(ctx->refcount != 1);
+
+		__cpt_context_put(ctx);
+	}
+	spin_unlock(&cpt_context_lock);
+}
+module_exit(exit_rst);
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_process.c linux-2.6.9-ve023stab054/kernel/cpt/rst_process.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_process.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_process.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,1298 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <linux/kmem_cache.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/ptrace.h>
+#include <linux/tty.h>
+#include <asm/desc.h>
+#include <asm/unistd.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_misc.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_ubc.h"
+#include "cpt_process.h"
+#include "cpt_kernel.h"
+
+#ifdef CONFIG_X86_64
+
+#define _TIF_RESUME (1<<22)
+
+#define task_pt_regs(t) (((struct pt_regs *) (THREAD_SIZE + (unsigned long) (t)->thread_info)) - 1)
+#define SYSCALL_NR(regs) ((regs)->orig_rax)
+#define SYSCALL_RETVAL(regs) ((regs)->rax)
+#define SYSCALL_PC(regs) ((regs)->rip)
+
+#define ESP(tsk) (tsk)->thread.rsp
+
+#define __NR32_restart_syscall	0
+#define __NR32_rt_sigtimedwait	177
+#define __NR32_pause		29
+#define __NR32_futex		240
+
+#define syscall_is(tsk,regs,name) ((!((tsk)->thread_info->flags&_TIF_IA32) && \
+				    SYSCALL_NR(regs) == __NR_##name) || \
+				   (((tsk)->thread_info->flags&_TIF_IA32) && \
+				    SYSCALL_NR(regs) == __NR32_##name))
+#else
+
+#define SYSCALL_NR(regs) ((regs)->orig_eax)
+#define SYSCALL_RETVAL(regs) ((regs)->eax)
+#define SYSCALL_PC(regs) ((regs)->eip)
+
+#define ESP(tsk) (tsk)->thread.esp
+
+#define syscall_is(tsk,regs,name) (SYSCALL_NR(regs) == __NR_##name)
+
+#endif
+
+static void decode_siginfo(siginfo_t *info, struct cpt_siginfo_image *si)
+{
+	memset(info, 0, sizeof(*info));
+	switch(si->cpt_code & __SI_MASK) {
+	case __SI_TIMER:
+		info->si_tid = si->cpt_pid;
+		info->si_overrun = si->cpt_uid;
+		info->_sifields._timer._sigval.sival_ptr = cpt_ptr_import(si->cpt_sigval);
+		info->si_sys_private = si->cpt_utime;
+		break;
+	case __SI_POLL:
+		info->si_band = si->cpt_pid;
+		info->si_fd = si->cpt_uid;
+		break;
+	case __SI_FAULT:
+		info->si_addr = cpt_ptr_import(si->cpt_sigval);
+#ifdef __ARCH_SI_TRAPNO
+		info->si_trapno = si->cpt_pid;
+#endif
+		break;
+	case __SI_CHLD:
+		info->si_pid = si->cpt_pid;
+		info->si_uid = si->cpt_uid;
+		info->si_status = si->cpt_sigval;
+		info->si_stime = si->cpt_stime;
+		info->si_utime = si->cpt_utime;
+		break;
+	case __SI_KILL:
+	case __SI_RT:
+	case __SI_MESGQ:
+	default:
+		info->si_pid = si->cpt_pid;
+		info->si_uid = si->cpt_uid;
+		info->si_ptr = cpt_ptr_import(si->cpt_sigval);
+		break;
+	}
+	info->si_signo = si->cpt_signo;
+	info->si_errno = si->cpt_errno;
+	info->si_code = si->cpt_code;
+}
+
+static int restore_sigqueue(task_t *tsk,
+			    struct sigpending *queue, unsigned long start,
+			    unsigned long end)
+{
+	while (start < end) {
+		struct cpt_siginfo_image *si = (struct cpt_siginfo_image *)start;
+		if (si->cpt_object == CPT_OBJ_SIGINFO) {
+			struct user_beancounter *ub;
+			struct sigqueue *q = NULL;
+			struct user_struct *up;
+			up = alloc_uid(si->cpt_user);
+			if (!up)
+				return -ENOMEM;
+			q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
+			if (!q) {
+				free_uid(up);
+				return -ENOMEM;
+			}
+			ub = get_beancounter(get_exec_ub());
+			if (ub_siginfo_charge(ub, 
+					      kmem_cache_memusage(sigqueue_cachep)) < 0) {
+				put_beancounter(ub);
+				kmem_cache_free(sigqueue_cachep, q);
+				free_uid(up);
+				return -ENOMEM;
+			}
+
+			INIT_LIST_HEAD(&q->list);
+			/* Preallocated elements (posix timers) are not
+			 * supported yet. It is safe to replace them with
+			 * a private one. */
+			q->flags = 0;
+			q->lock = &tsk->sighand->siglock;
+			q->user = up;
+			atomic_inc(&q->user->sigpending);
+			sig_ub(q) = ub;
+
+			decode_siginfo(&q->info, si);
+			list_add_tail(&q->list, &queue->list);
+		}
+		start += si->cpt_next;
+	}
+	return 0;
+}
+
+int rst_process_linkage(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+
+		if (tsk == NULL) {
+			eprintk_ctx("task %u(%s) is missing\n", ti->cpt_pid, ti->cpt_comm);
+			return -EINVAL;
+		}
+
+		if (virt_pgid(tsk) != ti->cpt_pgrp) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_pgrp)) < 0) {
+				eprintk_ctx("illegal PGRP " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			write_lock_irq(&tasklist_lock);
+			detach_pid(tsk, PIDTYPE_PGID);
+			tsk->signal->pgrp = pid;
+			set_virt_pgid(tsk, ti->cpt_pgrp);
+			if (thread_group_leader(tsk))
+				attach_pid(tsk, PIDTYPE_PGID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+		if (virt_sid(tsk) != ti->cpt_session) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_session)) < 0) {
+				eprintk_ctx("illegal SID " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			write_lock_irq(&tasklist_lock);
+			detach_pid(tsk, PIDTYPE_SID);
+			tsk->signal->session = pid;
+			set_virt_sid(tsk, ti->cpt_session);
+			if (thread_group_leader(tsk))
+				attach_pid(tsk, PIDTYPE_SID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+		if (ti->cpt_old_pgrp > 0 && tsk->signal->tty_old_pgrp == 0) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_old_pgrp)) < 0) {
+				eprintk_ctx("illegal OLD_PGRP " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			tsk->signal->tty_old_pgrp = pid;
+		}
+	}
+
+	return 0;
+}
+
+static int
+restore_one_signal_struct(struct cpt_task_image *ti, int *exiting, cpt_context_t *ctx)
+{
+	int err;
+	struct cpt_signal_image *si = cpt_get_buf(ctx);
+
+	current->signal->tty = NULL;
+
+	err = rst_get_object(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, si, ctx);
+	if (err) {
+		cpt_release_buf(ctx);
+		return err;
+	}
+
+	if (virt_pgid(current) != si->cpt_pgrp) {
+		int err;
+		int pid = 0;
+
+		if (si->cpt_pgrp_type == CPT_PGRP_ORPHAN) {
+			if (!is_virtual_pid(si->cpt_pgrp)) {
+				eprintk_ctx("external process group " CPT_FID, CPT_TID(current));
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			pid = alloc_pidmap();
+			if (pid < 0) {
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			if ((err = alloc_vpid(pid, si->cpt_pgrp)) < 0) {
+				free_pidmap(pid);
+				pid = 0;
+				if (err != -EEXIST) {
+					cpt_release_buf(ctx);
+					return err;
+				}
+			}
+		}
+		if (pid ||
+		    (pid = vpid_to_pid(si->cpt_pgrp)) > 0) {
+			write_lock_irq(&tasklist_lock);
+			detach_pid(current, PIDTYPE_PGID);
+			current->signal->pgrp = pid;
+			set_virt_pgid(current, si->cpt_pgrp);
+			if (thread_group_leader(current))
+				attach_pid(current, PIDTYPE_PGID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+	}
+
+	current->signal->tty_old_pgrp = 0;
+	if ((int)si->cpt_old_pgrp > 0) {
+		if (si->cpt_old_pgrp_type == CPT_PGRP_STRAY) {
+			current->signal->tty_old_pgrp = alloc_pidmap();
+			if (current->signal->tty_old_pgrp < 0) {
+				eprintk_ctx("failed to allocate stray tty_old_pgrp\n");
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			free_pidmap(current->signal->tty_old_pgrp);
+		} else {
+			current->signal->tty_old_pgrp = vpid_to_pid(si->cpt_old_pgrp);
+			if (current->signal->tty_old_pgrp < 0) {
+				dprintk_ctx("forward old tty PGID\n");
+				current->signal->tty_old_pgrp = 0;
+			}
+		}
+	}
+
+	if (virt_sid(current) != si->cpt_session) {
+		int err;
+		int pid = 0;
+
+		if (si->cpt_session_type == CPT_PGRP_ORPHAN) {
+			if (!is_virtual_pid(si->cpt_session)) {
+				eprintk_ctx("external process session " CPT_FID, CPT_TID(current));
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			pid = alloc_pidmap();
+			if (pid < 0) {
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			if ((err = alloc_vpid(pid, si->cpt_session)) < 0) {
+				free_pidmap(pid);
+				pid = 0;
+				if (err != -EEXIST) {
+					cpt_release_buf(ctx);
+					return err;
+				}
+			}
+		}
+		if (pid ||
+		    (pid = vpid_to_pid(si->cpt_session)) > 0) {
+			write_lock_irq(&tasklist_lock);
+			detach_pid(current, PIDTYPE_SID);
+			set_virt_sid(current, si->cpt_session);
+			current->signal->session = pid;
+			if (thread_group_leader(current))
+				attach_pid(current, PIDTYPE_SID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+	}
+
+	cpt_sigset_import(&current->signal->shared_pending.signal, si->cpt_sigpending);
+	current->signal->leader = si->cpt_leader;
+	if (si->cpt_ctty != CPT_NULL) {
+		cpt_object_t *obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, si->cpt_ctty, ctx);
+		if (obj) {
+			struct tty_struct *tty = obj->o_obj;
+			if (tty->session == 0 || tty->session == current->signal->session) {
+				tty->session = current->signal->session;
+				current->signal->tty = tty;
+			} else {
+				wprintk_ctx("tty session mismatch\n");
+			}
+		}
+	}
+
+	if (si->cpt_curr_target)
+		current->signal->curr_target = find_task_by_pid_ve(si->cpt_curr_target);
+	*exiting = si->cpt_group_exit;
+	current->signal->group_exit_code = si->cpt_group_exit_code;
+	if (si->cpt_group_exit_task) {
+		current->signal->group_exit_task = find_task_by_pid_ve(si->cpt_group_exit_task);
+		if (current->signal->group_exit_task == NULL) {
+			eprintk_ctx("oops, group_exit_task=NULL, pid=%u\n", si->cpt_group_exit_task);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	current->signal->notify_count = si->cpt_notify_count;
+	current->signal->group_stop_count = si->cpt_group_stop_count;
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,8)
+	current->signal->stop_state = si->cpt_stop_state;
+
+	current->signal->utime = si->cpt_utime;
+	current->signal->stime = si->cpt_stime;
+	current->signal->cutime = si->cpt_cutime;
+	current->signal->cstime = si->cpt_cstime;
+	current->signal->nvcsw = si->cpt_nvcsw;
+	current->signal->nivcsw = si->cpt_nivcsw;
+	current->signal->cnvcsw = si->cpt_cnvcsw;
+	current->signal->cnivcsw = si->cpt_cnivcsw;
+	current->signal->min_flt = si->cpt_min_flt;
+	current->signal->maj_flt = si->cpt_maj_flt;
+	current->signal->cmin_flt = si->cpt_cmin_flt;
+	current->signal->cmaj_flt = si->cpt_cmaj_flt;
+#endif
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9)
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<RLIM_NLIMITS; i++) {
+		current->signal->rlim[i].rlim_cur = si->cpt_rlim_cur[i];
+		current->signal->rlim[i].rlim_max = si->cpt_rlim_max[i];
+	}
+#endif
+
+	if (si->cpt_next > si->cpt_hdrlen) {
+		char *buf = kmalloc(si->cpt_next - si->cpt_hdrlen, GFP_KERNEL);
+		if (buf == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		err = ctx->pread(buf, si->cpt_next - si->cpt_hdrlen, ctx,
+				 ti->cpt_signal + si->cpt_hdrlen);
+		if (err) {
+			kfree(buf);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		restore_sigqueue(current,
+				 &current->signal->shared_pending, (unsigned long)buf,
+				 (unsigned long)buf + si->cpt_next - si->cpt_hdrlen);
+		kfree(buf);
+	}
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int restore_one_sighand_struct(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_sighand_image si;
+	int i;
+	loff_t pos, endpos;
+	
+	err = rst_get_object(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, &si, ctx);
+	if (err)
+		return err;
+
+	for (i=0; i<_NSIG; i++) {
+		current->sighand->action[i].sa.sa_handler = SIG_DFL;
+		current->sighand->action[i].sa.sa_restorer = 0;
+		current->sighand->action[i].sa.sa_flags = 0;
+		memset(&current->sighand->action[i].sa.sa_mask, 0, sizeof(sigset_t));
+	}
+
+	pos = ti->cpt_sighand + si.cpt_hdrlen;
+	endpos = ti->cpt_sighand + si.cpt_next;
+	while (pos < endpos) {
+		struct cpt_sighandler_image shi;
+
+		err = rst_get_object(CPT_OBJ_SIGHANDLER, pos, &shi, ctx);
+		if (err)
+			return err;
+		current->sighand->action[shi.cpt_signo].sa.sa_handler = (void*)(unsigned long)shi.cpt_handler;
+		current->sighand->action[shi.cpt_signo].sa.sa_restorer = (void*)(unsigned long)shi.cpt_restorer;
+		current->sighand->action[shi.cpt_signo].sa.sa_flags = shi.cpt_flags;
+		cpt_sigset_import(&current->sighand->action[shi.cpt_signo].sa.sa_mask, shi.cpt_mask);
+		pos += shi.cpt_next;
+	}
+
+	return 0;
+}
+
+
+__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+	if (lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx))
+		flag |= CLONE_THREAD;
+	if (ti->cpt_sighand == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx))
+		flag |= CLONE_SIGHAND;
+	return flag;
+}
+
+int
+rst_signal_complete(struct cpt_task_image *ti, int * exiting, cpt_context_t *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	if (ti->cpt_signal == CPT_NULL || ti->cpt_sighand == CPT_NULL) {
+		return -EINVAL;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx);
+	if (obj) {
+		struct sighand_struct *sig = current->sighand;
+		if (obj->o_obj != sig) {
+			return -EINVAL;
+		}
+	} else {
+		obj = cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, current->sighand, ctx);
+		if (obj == NULL)
+			return -ENOMEM;
+		cpt_obj_setpos(obj, ti->cpt_sighand, ctx);
+		err = restore_one_sighand_struct(ti, ctx);
+		if (err)
+			return err;
+	}
+
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx);
+	if (obj) {
+		struct signal_struct *sig = current->signal;
+		if (obj->o_obj != sig) {
+			return -EINVAL;
+		}
+		if (current->signal) {
+			set_virt_pgid(current, pid_type_to_vpid(PIDTYPE_PGID, current->signal->pgrp));
+			set_virt_sid(current, pid_type_to_vpid(PIDTYPE_SID, current->signal->session));
+		}
+	} else {
+		obj = cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, current->signal, ctx);
+		if (obj == NULL)
+			return -ENOMEM;
+		cpt_obj_setpos(obj, ti->cpt_signal, ctx);
+		err = restore_one_signal_struct(ti, exiting, ctx);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static u32 decode_segment(u32 segid)
+{
+	if (segid == CPT_SEG_ZERO)
+		return 0;
+
+	/* TLS descriptors */
+	if (segid <= CPT_SEG_TLS3)
+		return ((GDT_ENTRY_TLS_MIN + segid-CPT_SEG_TLS1)<<3) + 3;
+
+	/* LDT descriptor, it is just an index to LDT array */
+	if (segid >= CPT_SEG_LDT)
+		return ((segid - CPT_SEG_LDT) << 3) | 7;
+
+	/* Check for one of standard descriptors */
+#ifdef CONFIG_X86_64
+	if (segid == CPT_SEG_USER32_DS)
+		return __USER32_DS;
+	if (segid == CPT_SEG_USER32_CS)
+		return __USER32_CS;
+	if (segid == CPT_SEG_USER64_DS)
+		return __USER_DS;
+	if (segid == CPT_SEG_USER64_CS)
+		return __USER_CS;
+#else
+	if (segid == CPT_SEG_USER32_DS)
+		return __USER_DS;
+	if (segid == CPT_SEG_USER32_CS)
+		return __USER_CS;
+#endif
+	wprintk("Invalid segment reg %d\n", segid);
+	return 0;
+}
+
+asmlinkage unsigned long rct(unsigned long *child_tids)
+{
+	dprintk("rct: " CPT_FID "\n", CPT_TID(current));
+	current->clear_child_tid = (void*)child_tids[0];
+	current->set_child_tid = (void*)child_tids[1];
+	module_put(THIS_MODULE);
+	return (unsigned long)(child_tids+2);
+}
+
+asmlinkage unsigned long rlsi(void)
+{
+	int signr;
+	siginfo_t *info = current->last_siginfo;
+	struct pt_regs *regs = task_pt_regs(current);
+	struct k_sigaction *ka;
+	int ptrace_id;
+
+	dprintk("rlsi: " CPT_FID "\n", CPT_TID(current));
+
+	spin_lock_irq(&current->sighand->siglock);
+	current->last_siginfo = NULL;
+	recalc_sigpending();
+
+	ptrace_id = current->pn_state;
+	clear_pn_state(current);
+
+	switch (ptrace_id) {
+	case PN_STOP_TF:
+	case PN_STOP_TF_RT:
+		/* frame_*signal */
+		dprintk("SIGTRAP %u/%u(%s) %u/%u %u %ld %lu %lu\n",
+		       virt_pid(current), current->pid, current->comm,
+		       info->si_signo, info->si_code,
+		       current->exit_code, SYSCALL_NR(regs),
+		       current->ptrace, current->ptrace_message);
+		goto out;
+	case PN_STOP_ENTRY:
+	case PN_STOP_LEAVE:
+		/* do_syscall_trace */
+		spin_unlock_irq(&current->sighand->siglock);
+		dprintk("ptrace do_syscall_trace: %d %d\n", ptrace_id, current->exit_code);
+		if (current->exit_code) {
+			send_sig(current->exit_code, current, 1);
+			current->exit_code = 0;
+		}
+		if (ptrace_id == PN_STOP_ENTRY && SYSCALL_RETVAL(regs) == -ENOSYS) {
+			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
+			SYSCALL_PC(regs) -= 2;
+		} else if (syscall_is(current, regs, rt_sigtimedwait)) {
+			if (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR) {
+				SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
+				SYSCALL_PC(regs) -= 2;
+			}
+		}
+		goto out_nolock;
+	case PN_STOP_FORK:
+		/* fork */
+		SYSCALL_RETVAL(regs) = current->ptrace_message;
+		dprintk("ptrace fork returns pid %ld\n", SYSCALL_RETVAL(regs));
+		goto out;
+	case PN_STOP_VFORK:
+		/* after vfork */
+		SYSCALL_RETVAL(regs) = current->ptrace_message;
+		dprintk("ptrace after vfork returns pid %ld\n", SYSCALL_RETVAL(regs));
+		goto out;
+	case PN_STOP_SIGNAL:
+		/* normal case : dequeue signal */
+		break;
+	case PN_STOP_EXIT:
+		dprintk("ptrace exit caught\n");
+		current->ptrace &= ~PT_TRACE_EXIT;
+		spin_unlock_irq(&current->sighand->siglock);
+		module_put(THIS_MODULE);
+		complete_and_exit(NULL, current->ptrace_message);
+		BUG();
+	case PN_STOP_EXEC:
+		eprintk("ptrace after exec caught: must not happen\n");
+		BUG();
+	default:
+		eprintk("ptrace with unknown identity %d\n", ptrace_id);
+		BUG();
+	}
+
+	signr = current->exit_code;
+	if (signr == 0) {
+		dprintk("rlsi: canceled signal %d\n", info->si_signo);
+		goto out;
+	}
+	current->exit_code = 0;
+
+	if (signr != info->si_signo) {
+		info->si_signo = signr;
+		info->si_errno = 0;
+		info->si_code = SI_USER;
+		info->si_pid = virt_pid(current->parent);
+		info->si_uid = current->parent->uid;
+	}
+
+	/* If the (new) signal is now blocked, requeue it.  */
+	if (sigismember(&current->blocked, signr)) {
+		dprintk("going to requeue signal %d\n", signr);
+		goto out_resend_sig;
+	}
+
+	ka = &current->sighand->action[signr-1];
+	if (ka->sa.sa_handler == SIG_IGN) {
+		dprintk("going to resend signal %d (ignored)\n", signr);
+		goto out;
+	}
+	if (ka->sa.sa_handler != SIG_DFL) {
+		dprintk("going to resend signal %d (not SIG_DFL)\n", signr);
+		goto out_resend_sig;
+	}
+	if (signr == SIGCONT ||
+	    signr == SIGCHLD ||
+	    signr == SIGWINCH ||
+	    signr == SIGURG ||
+	    current->pid == 1)
+		goto out;
+
+	/* All the rest, which we cannot handle are requeued. */
+	dprintk("going to resend signal %d (sigh)\n", signr);
+out_resend_sig:
+	spin_unlock_irq(&current->sighand->siglock);
+	send_sig_info(signr, info, current);
+	module_put(THIS_MODULE);
+	return (unsigned long)(info+1);
+
+out:
+	spin_unlock_irq(&current->sighand->siglock);
+out_nolock:
+	module_put(THIS_MODULE);
+	return (unsigned long)(info+1);
+}
+
+static void ret_finish_stop(void)
+{
+	/* ...
+	 * do_signal() ->
+	 *   get_signal_to_deliver() ->
+	 *     do_signal_stop() ->
+	 *       finish_stop()
+	 *
+	 * Normally after SIGCONT it will dequeue the next signal. If no signal
+	 * is found, do_signal restarts syscall unconditionally.
+	 * Otherwise signal handler is pushed on user stack.
+	 */
+
+	dprintk("rfs: " CPT_FID "\n", CPT_TID(current));
+
+	clear_stop_state(current);
+	current->exit_code = 0;
+
+	module_put(THIS_MODULE);
+}
+
+static void ret_finish_sigsuspend(void)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+
+	/* *sigsuspend() -> do_signal() -> ...
+	 *
+	 * We can be stopped in three places:
+	 * 1. do_signal() -> refrigerator()
+	 *    { sigsuspend flag, oldmask is saved }
+	 * 2. do_signal() -> get_signal_to_deliver() -> ptrace stop
+	 *    { sigsuspend flag, oldmask is saved; PN_STOP_SIGNAL }
+	 * 3. do_signal() -> get_signal_to_deliver() -> do_signal_stop() ->
+	 *	finish_stop()
+	 *    { sigsuspend flag, oldmask is saved; stop_state }
+	 *	
+	 * When we reached this hook, hook for pn (rlsi) and stop (rfs)
+	 * have already been executed. Now we are going to proceed with
+	 * *sigsuspend().
+	 */
+	dprintk("rfss: " CPT_FID "\n", CPT_TID(current));
+
+	while (1) {
+		current->state = TASK_INTERRUPTIBLE;
+		schedule();
+		if (do_signal(regs, &current->saved_sigset)) {
+			clear_sigsuspend_state(current);
+			break;
+		}
+	}
+
+	module_put(THIS_MODULE);
+}
+
+static void ret_restart_sys(void)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+
+	/* This hook is supposed to be executed, when we have
+	 * to complete some interrupted syscall.
+	 */
+	dprintk("rrs: " CPT_FID "\n", CPT_TID(current));
+
+	if (syscall_is(current,regs,pause)) {
+		if (SYSCALL_RETVAL(regs) == -ERESTARTNOHAND) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule();
+		}
+	} else if (syscall_is(current,regs,rt_sigtimedwait)) {
+		if (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR) {
+			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
+			SYSCALL_PC(regs) -= 2;
+		}
+	} else if (syscall_is(current,regs,futex)) {
+		if (SYSCALL_RETVAL(regs) == -EINTR) {
+			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
+			SYSCALL_PC(regs) -= 2;
+		}
+	}
+
+	if (!signal_pending(current)) {
+		if (SYSCALL_RETVAL(regs) == -ERESTARTSYS ||
+		    SYSCALL_RETVAL(regs) == -ERESTARTNOINTR ||
+		    SYSCALL_RETVAL(regs) == -ERESTARTNOHAND) {
+			SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
+			SYSCALL_PC(regs) -= 2;
+		} else if (SYSCALL_RETVAL(regs) == -ERESTART_RESTARTBLOCK) {
+			SYSCALL_RETVAL(regs) = __NR_restart_syscall;
+#ifdef CONFIG_X86_64
+			if (current->thread_info->flags&_TIF_IA32)
+				SYSCALL_RETVAL(regs) = __NR32_restart_syscall;
+#endif
+			SYSCALL_PC(regs) -= 2;
+		}
+	}
+
+	module_put(THIS_MODULE);
+}
+
+extern asmlinkage void ret_last_siginfo(void);
+extern asmlinkage void ret_child_tid(void);
+extern asmlinkage void ret_from_rst(void);
+extern asmlinkage void pre_ret_from_fork(void);
+
+#ifndef CONFIG_X86_64
+
+/* tsk->thread.eip points to pre_ret_from_fork
+ * Stack layout:
+ * [eip of the last hook]
+ * [args of the last hook]
+ * [eip of previous hook]
+ * [args of previous hook]
+ * ...
+ * [eip of the first hook]
+ * [args of the first hook]
+ * [ret_from_rst]
+ */
+
+static void * add_hook(task_t *tsk, void (*hook)(void), int argsize, int *hooks)
+{
+	ESP(tsk) -= sizeof(unsigned long);
+	*(unsigned long*)ESP(tsk) = tsk->thread.eip;
+	ESP(tsk) -= argsize;
+	tsk->thread.eip = (unsigned long)hook;
+	if (!try_module_get(THIS_MODULE)) BUG();
+	(*hooks)++;
+	return (void*)ESP(tsk);
+}
+
+static int restore_registers(task_t *tsk, struct pt_regs *regs,
+			     struct cpt_task_image *ti, struct cpt_x86_regs *b)
+{
+	if (b->cpt_object != CPT_OBJ_X86_REGS)
+		return -EINVAL;
+
+	tsk->thread.esp = (unsigned long) regs;
+	tsk->thread.esp0 = (unsigned long) (regs+1);
+	tsk->thread.eip = (unsigned long) ret_from_rst;
+
+	tsk->thread.fs = decode_segment(b->cpt_fs);
+	tsk->thread.gs = decode_segment(b->cpt_gs);
+	tsk->thread.debugreg[0] = b->cpt_debugreg[0];
+	tsk->thread.debugreg[1] = b->cpt_debugreg[1];
+	tsk->thread.debugreg[2] = b->cpt_debugreg[2];
+	tsk->thread.debugreg[3] = b->cpt_debugreg[3];
+	tsk->thread.debugreg[4] = b->cpt_debugreg[4];
+	tsk->thread.debugreg[5] = b->cpt_debugreg[5];
+	tsk->thread.debugreg[6] = b->cpt_debugreg[6];
+	tsk->thread.debugreg[7] = b->cpt_debugreg[7];
+
+	memcpy(regs, &b->cpt_ebx, sizeof(struct pt_regs));
+
+	regs->xcs = decode_segment(b->cpt_xcs);
+	regs->xss = decode_segment(b->cpt_xss);
+	regs->xds = decode_segment(b->cpt_xds);
+	regs->xes = decode_segment(b->cpt_xes);
+
+	return 0;
+}
+
+#else
+
+/* Stack layout:
+ *
+ * [eip of the last hook]
+ * [args of the last hook]
+ * ...
+ * [eip of the first hook]
+ * [args of the first hook]
+ * [ret_from_fork+5]
+ */
+
+static void * add_hook(task_t *tsk, void (*hook)(void), int argsize, int *hooks)
+{
+	if (!*hooks) {
+		extern void ret_from_fork2(void);
+		ESP(tsk) -= sizeof(unsigned long);
+		*(unsigned long*)ESP(tsk) = (unsigned long)ret_from_fork2;
+		tsk->thread_info->flags |= _TIF_RESUME;
+	}
+	ESP(tsk) -= argsize + sizeof(unsigned long);
+	*(unsigned long*)ESP(tsk) = (unsigned long)hook;
+	if (!try_module_get(THIS_MODULE)) BUG();
+	(*hooks)++;
+	return (void*)(ESP(tsk) + sizeof(unsigned long));
+}
+
+static void xlate_ptregs_32_to_64(struct pt_regs *d, struct cpt_x86_regs *s)
+{
+	memset(d, 0, sizeof(struct pt_regs));
+	d->rbp = s->cpt_ebp;
+	d->rbx = s->cpt_ebx;
+	d->rax = (s32)s->cpt_eax;
+	d->rcx = s->cpt_ecx;
+	d->rdx = s->cpt_edx;
+	d->rsi = s->cpt_esi;
+	d->rdi = s->cpt_edi;
+	d->orig_rax = (s32)s->cpt_orig_eax;
+	d->rip = s->cpt_eip;
+	d->cs = s->cpt_xcs;
+	d->eflags = s->cpt_eflags;
+	d->rsp = s->cpt_esp;
+	d->ss = s->cpt_xss;
+}
+
+static int restore_registers(task_t *tsk, struct pt_regs *regs,
+			     struct cpt_task_image *ti, struct cpt_obj_bits *hdr)
+{
+	if (hdr->cpt_object == CPT_OBJ_X86_64_REGS) {
+		struct cpt_x86_64_regs *b = (void*)hdr;
+
+		tsk->thread.rsp = (unsigned long) regs;
+		tsk->thread.rsp0 = (unsigned long) (regs+1);
+
+		tsk->thread.fs = b->cpt_fsbase;
+		tsk->thread.gs = b->cpt_gsbase;
+		tsk->thread.fsindex = decode_segment(b->cpt_fsindex);
+		tsk->thread.gsindex = decode_segment(b->cpt_gsindex);
+		tsk->thread.ds = decode_segment(b->cpt_ds);
+		tsk->thread.es = decode_segment(b->cpt_es);
+		tsk->thread.debugreg0 = b->cpt_debugreg[0];
+		tsk->thread.debugreg1 = b->cpt_debugreg[1];
+		tsk->thread.debugreg2 = b->cpt_debugreg[2];
+		tsk->thread.debugreg3 = b->cpt_debugreg[3];
+		tsk->thread.debugreg6 = b->cpt_debugreg[6];
+		tsk->thread.debugreg7 = b->cpt_debugreg[7];
+
+		memcpy(regs, &b->cpt_r15, sizeof(struct pt_regs));
+
+		tsk->thread.userrsp = regs->rsp;
+		regs->cs = decode_segment(b->cpt_cs);
+		regs->ss = decode_segment(b->cpt_ss);
+	} else if (hdr->cpt_object == CPT_OBJ_X86_REGS) {
+		struct cpt_x86_regs *b = (void*)hdr;
+
+		tsk->thread.rsp = (unsigned long) regs;
+		tsk->thread.rsp0 = (unsigned long) (regs+1);
+
+		tsk->thread.fs = 0;
+		tsk->thread.gs = 0;
+		tsk->thread.fsindex = decode_segment(b->cpt_fs);
+		tsk->thread.gsindex = decode_segment(b->cpt_gs);
+		tsk->thread.debugreg0 = b->cpt_debugreg[0];
+		tsk->thread.debugreg1 = b->cpt_debugreg[1];
+		tsk->thread.debugreg2 = b->cpt_debugreg[2];
+		tsk->thread.debugreg3 = b->cpt_debugreg[3];
+		tsk->thread.debugreg6 = b->cpt_debugreg[6];
+		tsk->thread.debugreg7 = b->cpt_debugreg[7];
+
+		xlate_ptregs_32_to_64(regs, b);
+
+		tsk->thread.userrsp = regs->rsp;
+		regs->cs = decode_segment(b->cpt_xcs);
+		regs->ss = decode_segment(b->cpt_xss);
+		tsk->thread.ds = decode_segment(b->cpt_xds);
+		tsk->thread.es = decode_segment(b->cpt_xes);
+	} else {
+		return -EINVAL;
+	}
+	return 0;
+}
+
+#endif
+
+int rst_restore_process(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+		struct pt_regs * regs;
+		struct cpt_object_hdr *b;
+		struct cpt_siginfo_image *lsi = NULL;
+		struct group_info *gids, *ogids;
+		struct task_beancounter *tbc;
+		struct user_beancounter *old_bc;
+		struct user_beancounter *new_bc;
+		int err;
+		int hooks = 0;
+		int i;
+
+		if (tsk == NULL) {
+			eprintk_ctx("oops, task %d/%s is missing\n", ti->cpt_pid, ti->cpt_comm);
+			return -EFAULT;
+		}
+
+		wait_task_inactive(tsk);
+
+		tbc = task_bc(tsk);
+		new_bc = rst_lookup_ubc(ti->cpt_exec_ub, ctx);
+		err = virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RSTTSK, new_bc);
+		if (err & NOTIFY_FAIL) {
+			put_beancounter(new_bc);
+			return -ECHRNG;
+		}
+		old_bc = tbc->exec_ub;
+		if ((err & VIRTNOTIFY_CHANGE) && old_bc != new_bc) {
+			dprintk(" *** replacing ub %p by %p for %p (%d %s)\n",
+				old_bc, new_bc, tsk, tsk->pid, tsk->comm);
+			tbc->exec_ub = new_bc;
+			new_bc = old_bc;
+		}
+		put_beancounter(new_bc);
+
+		regs = task_pt_regs(tsk);
+
+		if (!tsk->exit_state) {
+			tsk->lock_depth = -1;
+#ifdef CONFIG_PREEMPT
+			tsk->thread_info->preempt_count--;
+#endif
+		}
+
+		if (tsk->static_prio != ti->cpt_static_prio)
+			set_user_nice(tsk, PRIO_TO_NICE((s32)ti->cpt_static_prio));
+
+		cpt_sigset_import(&tsk->blocked, ti->cpt_sigblocked);
+		cpt_sigset_import(&tsk->real_blocked, ti->cpt_sigrblocked);
+		cpt_sigset_import(&tsk->saved_sigset, ti->cpt_sigsuspend_blocked);
+		cpt_sigset_import(&tsk->pending.signal, ti->cpt_sigpending);
+
+		tsk->uid = ti->cpt_uid;
+		tsk->euid = ti->cpt_euid;
+		tsk->suid = ti->cpt_suid;
+		tsk->fsuid = ti->cpt_fsuid;
+		tsk->gid = ti->cpt_gid;
+		tsk->egid = ti->cpt_egid;
+		tsk->sgid = ti->cpt_sgid;
+		tsk->fsgid = ti->cpt_fsgid;
+		memcpy(&tsk->cap_effective, &ti->cpt_ecap, sizeof(tsk->cap_effective));
+		memcpy(&tsk->cap_inheritable, &ti->cpt_icap, sizeof(tsk->cap_inheritable));
+		memcpy(&tsk->cap_permitted, &ti->cpt_pcap, sizeof(tsk->cap_permitted));
+		tsk->keep_capabilities = (ti->cpt_keepcap != 0);
+		tsk->did_exec = (ti->cpt_did_exec != 0);
+		gids = groups_alloc(ti->cpt_ngids);
+		ogids = tsk->group_info;
+		if (gids) {
+			int i;
+			for (i=0; i<32; i++)
+				gids->small_block[i] = ti->cpt_gids[i];
+			tsk->group_info = gids;
+		}
+		if (ogids)
+			put_group_info(ogids);
+		tsk->utime = ti->cpt_utime;
+		tsk->stime = ti->cpt_stime;
+		if (ctx->image_version == CPT_VERSION_8)
+			tsk->start_time = _ns_to_timespec((s64)ti->cpt_starttime*TICK_NSEC);
+		else
+			cpt_timespec_import(&tsk->start_time, ti->cpt_starttime);
+		set_normalized_timespec(&tsk->start_time,
+				tsk->start_time.tv_sec +
+				VE_TASK_INFO(tsk)->owner_env->start_timespec.tv_sec,
+				tsk->start_time.tv_nsec +
+				VE_TASK_INFO(tsk)->owner_env->start_timespec.tv_nsec);
+		tsk->nvcsw = ti->cpt_nvcsw;
+		tsk->nivcsw = ti->cpt_nivcsw;
+		tsk->min_flt = ti->cpt_min_flt;
+		tsk->maj_flt = ti->cpt_maj_flt;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
+		tsk->cutime = ti->cpt_cutime;
+		tsk->cstime = ti->cpt_cstime;
+		tsk->cnvcsw = ti->cpt_cnvcsw;
+		tsk->cnivcsw = ti->cpt_cnivcsw;
+		tsk->cmin_flt = ti->cpt_cmin_flt;
+		tsk->cmaj_flt = ti->cpt_cmaj_flt;
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,9)
+		if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+			__asm__("undefined\n");
+
+		for (i=0; i<RLIM_NLIMITS; i++) {
+			tsk->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
+			tsk->rlim[i].rlim_max = ti->cpt_rlim_max[i];
+		}
+#endif
+
+		for (i=0; i<3; i++) {
+			if (i >= GDT_ENTRY_TLS_ENTRIES) {
+				eprintk_ctx("too many tls descs\n");
+			} else {
+#ifndef CONFIG_X86_64
+				tsk->thread.tls_array[i].a = ti->cpt_tls[i]&0xFFFFFFFF;
+				tsk->thread.tls_array[i].b = ti->cpt_tls[i]>>32;
+#else
+				tsk->thread.tls_array[i] = ti->cpt_tls[i];
+#endif
+			}
+		}
+
+		tsk->used_math = 0;
+
+		b = (void *)(ti+1);
+		while ((void*)b < ((void*)ti) + ti->cpt_next) {
+			/* Siginfo objects are at the end of obj array */
+			if (b->cpt_object == CPT_OBJ_SIGINFO) {
+				struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
+				restore_sigqueue(tsk, &tsk->pending, (unsigned long)b, (unsigned long)ti + ti->cpt_next);
+				set_exec_env(env);
+				break;
+			}
+
+			switch (b->cpt_object) {
+			case CPT_OBJ_BITS:
+				if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE &&
+				    cpu_has_fxsr) {
+					memcpy(&tsk->thread.i387,
+					       (void*)b + b->cpt_hdrlen,
+					       sizeof(struct i387_fxsave_struct));
+					tsk->used_math = ti->cpt_used_math;
+				}
+#ifndef CONFIG_X86_64
+				else if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE_OLD &&
+					 !cpu_has_fxsr) {		
+					memcpy(&tsk->thread.i387,
+					       (void*)b + b->cpt_hdrlen,
+					       sizeof(struct i387_fsave_struct));
+					tsk->used_math = ti->cpt_used_math;
+				}
+#endif
+				break;
+			case CPT_OBJ_LASTSIGINFO:
+				lsi = (void*)b;
+				break;
+			case CPT_OBJ_X86_REGS:
+			case CPT_OBJ_X86_64_REGS:
+				if (restore_registers(tsk, regs, ti, (void*)b)) {
+					eprintk_ctx("cannot restore registers: image is corrupted\n");
+					return -EINVAL;
+				}
+				break;
+			case CPT_OBJ_SIGALTSTACK: {
+				struct cpt_sigaltstack_image *sas;
+				sas = (struct cpt_sigaltstack_image *)b;
+				tsk->sas_ss_sp = sas->cpt_stack;
+				tsk->sas_ss_size = sas->cpt_stacksize;
+				break;
+			    }
+			}
+			b = ((void*)b) + b->cpt_next;
+		}
+
+		if (ti->cpt_ppid != ti->cpt_rppid) {
+			task_t *parent;
+			struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
+			write_lock_irq(&tasklist_lock);
+			parent = find_task_by_pid_ve(ti->cpt_ppid);
+			if (parent && parent != tsk->parent) {
+				list_add(&tsk->ptrace_list, &tsk->parent->ptrace_children);
+				REMOVE_LINKS(tsk);
+				tsk->parent = parent;
+				SET_LINKS(tsk);
+			}
+			write_unlock_irq(&tasklist_lock);
+			set_exec_env(env);
+		}
+
+		tsk->ptrace_message = ti->cpt_ptrace_message;
+		tsk->pn_state = ti->cpt_pn_state;
+		tsk->stopped_state = ti->cpt_stopped_state;
+		tsk->sigsuspend_state = ti->cpt_sigsuspend_state;
+		tsk->thread_info->flags = ti->cpt_thrflags;
+
+#ifdef CONFIG_X86_64
+		tsk->thread_info->flags |= _TIF_FORK;
+		if (!ti->cpt_64bit)
+			tsk->thread_info->flags |= _TIF_IA32;
+#endif
+
+#ifndef CONFIG_X86_64
+		do {
+			if (regs->orig_eax == __NR__newselect && regs->edi) {
+				struct timeval tv;
+				if (access_process_vm(tsk, regs->edi, &tv, 
+						sizeof(tv), 0) != sizeof(tv)) {
+					wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm: edi %ld\n",
+						virt_pid(tsk), tsk->pid, tsk->comm,
+					       regs->edi);
+					break;
+				}
+				dprintk_ctx("task %d/%d(%s): Old timeval in newselect: %ld.%ld\n",
+				       virt_pid(tsk), tsk->pid, tsk->comm,
+				       tv.tv_sec, tv.tv_usec);
+				tv.tv_sec -= ctx->delta_time.tv_sec;
+				if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
+					tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
+					tv.tv_sec--;
+				} else {
+					tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
+				}
+				if (tv.tv_sec < 0) {
+					tv.tv_sec = 0;
+					tv.tv_usec = 0;
+				}
+				dprintk_ctx("task %d/%d(%s): New timeval in newselect: %ld.%ld\n",
+					virt_pid(tsk), tsk->pid, tsk->comm,
+				       tv.tv_sec, tv.tv_usec);
+				if (access_process_vm(tsk, regs->edi, &tv, 
+						sizeof(tv), 1) != sizeof(tv)) {
+					wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm write: edi %ld\n",
+						virt_pid(tsk), tsk->pid, tsk->comm, regs->edi);
+				}
+				
+			} else if (regs->orig_eax == __NR_select && regs->edi) {
+				struct {
+					unsigned long n;
+					fd_set __user *inp, *outp, *exp;
+					struct timeval __user *tvp;
+				} a;
+				struct timeval tv;
+				if (access_process_vm(tsk, regs->ebx, &a, 
+						sizeof(a), 0) != sizeof(a)) {
+					wprintk_ctx("task %d: Error 2 in access_process_vm\n", tsk->pid);
+					break;
+				}
+				if (access_process_vm(tsk, (unsigned long)a.tvp,
+						&tv, sizeof(tv), 0) != sizeof(tv)) {
+					wprintk_ctx("task %d: Error 3 in access_process_vm\n", tsk->pid);
+					break;
+				}
+				dprintk_ctx("task %d: Old timeval in select: %ld.%ld\n",
+					tsk->pid, tv.tv_sec, tv.tv_usec);
+				tv.tv_sec -= ctx->delta_time.tv_sec;
+				if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
+					tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
+					tv.tv_sec--;
+				} else {
+					tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
+				}
+				if (tv.tv_sec < 0) {
+					tv.tv_sec = 0;
+					tv.tv_usec = 0;
+				}
+				dprintk_ctx("task %d: New timeval in select: %ld.%ld\n",
+					tsk->pid, tv.tv_sec, tv.tv_usec);
+				if (access_process_vm(tsk, (unsigned long)a.tvp,
+						&tv, sizeof(tv), 1) != sizeof(tv)) {
+					wprintk_ctx("task %d: Error 3 in access_process_vm write\n", tsk->pid);
+				}
+			}
+		} while (0);
+#endif
+
+		if (!tsk->exit_state && (long)SYSCALL_NR(regs) >= 0) {
+			if (SYSCALL_RETVAL(regs) == -ERESTARTSYS ||
+			    SYSCALL_RETVAL(regs) == -ERESTARTNOINTR ||
+			    SYSCALL_RETVAL(regs) == -ERESTARTNOHAND ||
+			    SYSCALL_RETVAL(regs) == -ERESTART_RESTARTBLOCK ||
+			    syscall_is(tsk,regs,pause) ||
+			    (syscall_is(tsk,regs,rt_sigtimedwait) &&
+			     (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR)) ||
+			    (syscall_is(tsk,regs,futex) &&
+			     (SYSCALL_RETVAL(regs) == -EINTR)))
+				add_hook(tsk, ret_restart_sys, 0, &hooks);
+		}
+
+		if (tsk->sigsuspend_state) {
+			dprintk_ctx("sigsuspend\n");
+			add_hook(tsk, ret_finish_sigsuspend, 0, &hooks);
+		}
+
+		if (lsi || tsk->pn_state) {
+			/* ... -> ptrace_notify()
+			 * or
+			 * ... -> do_signal() -> get_signal_to_deliver() ->
+			 *   ptrace stop
+			 */
+			tsk->last_siginfo = add_hook(tsk, ret_last_siginfo, sizeof(siginfo_t), &hooks);
+			memset(tsk->last_siginfo, 0, sizeof(siginfo_t));
+			if (lsi)
+				decode_siginfo(tsk->last_siginfo, lsi);
+		}
+
+		tsk->ptrace = ti->cpt_ptrace;
+		tsk->flags = ti->cpt_flags & ~PF_FROZEN;
+		clear_tsk_thread_flag(tsk, TIF_FREEZE);
+		tsk->exit_signal = ti->cpt_exit_signal;
+
+		if (tsk->stopped_state) {
+			dprintk_ctx("finish_stop\n");
+			if (ti->cpt_state != TASK_STOPPED)
+				eprintk_ctx("Hellooo, state is %u\n", (unsigned)ti->cpt_state);
+			add_hook(tsk, ret_finish_stop, 0, &hooks);
+		}
+
+		if (!tsk->exit_state &&
+		    (ti->cpt_set_tid || ti->cpt_clear_tid)) {
+			unsigned long *ptr = add_hook(tsk, ret_child_tid, sizeof(unsigned long)*2, &hooks);
+			ptr[0] = ti->cpt_clear_tid;
+			ptr[1] = ti->cpt_set_tid;
+			dprintk_ctx("settids\n");
+		}
+
+#ifdef CONFIG_X86_64
+		if (!hooks && (long)SYSCALL_NR(regs) < 0) {
+			extern void ret_from_fork2(void);
+			ESP(tsk) -= sizeof(unsigned long);
+			*(unsigned long*)ESP(tsk) = (unsigned long)ret_from_fork2;
+			tsk->thread_info->flags |= _TIF_RESUME;
+		}
+#else
+		tsk->thread.esp -= 4;
+		*(__u32*)tsk->thread.esp = tsk->thread.eip;
+		tsk->thread.eip = (unsigned long)pre_ret_from_fork;
+#endif
+
+		if (ti->cpt_state == TASK_TRACED)
+			tsk->state = TASK_TRACED;
+		else if (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD)) {
+			tsk->it_virt_value = 0;
+			tsk->it_prof_value = 0;
+			if (tsk->state != EXIT_DEAD)
+				eprintk_ctx("oops, schedule() did not make us dead\n");
+		}
+
+		if (ti->cpt_it_real_value &&
+		    !(ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+			unsigned long tmo;
+			s64 val = ti->cpt_it_real_value;
+			if (ctx->image_version < CPT_VERSION_9)
+				val *= TICK_NSEC;
+			val -= ctx->delta_nsec;
+			if (val <= 0)
+				val = TICK_NSEC;
+			tmo = _ns_to_jiffies(val + TICK_NSEC - 1);
+			mod_timer(&tsk->real_timer, (unsigned long)ctx->cpt_jiffies64 + tmo);
+			dprintk_ctx("itimer " CPT_FID " +%lu %lu %lu %lu %lu %lu\n", CPT_TID(tsk),
+				tmo,
+				(unsigned long)ctx->cpt_jiffies64 + tmo, jiffies,
+				(unsigned long)ti->cpt_restart.arg0, timespec_to_jiffies(&ctx->delta_time), (unsigned long)ctx->cpt_jiffies64);
+		}
+
+		module_put(THIS_MODULE);
+	}
+	return 0;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_socket.c linux-2.6.9-ve023stab054/kernel/cpt/rst_socket.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_socket.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_socket.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,806 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/scm.h>
+#include <net/af_unix.h>
+
+#include <ub/ub_mem.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
+
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+#include "cpt_syscalls.h"
+
+
+static int setup_sock_common(struct sock *sk, struct cpt_sock_image *si,
+			     loff_t pos, struct cpt_context *ctx)
+{
+	if (sk->sk_socket) {
+		sk->sk_socket->passcred = si->cpt_passcred;
+		sk->sk_socket->flags = si->cpt_ssflags;
+		sk->sk_socket->state = si->cpt_sstate;
+	}
+	sk->sk_reuse = si->cpt_reuse;
+	sk->sk_shutdown = si->cpt_shutdown;
+	sk->sk_userlocks = si->cpt_userlocks;
+	sk->sk_no_check = si->cpt_no_check;
+	sk->sk_debug = si->cpt_debug;
+	sk->sk_rcvtstamp = si->cpt_rcvtstamp;
+	sk->sk_localroute = si->cpt_localroute;
+	sk->sk_protocol = si->cpt_protocol;
+	sk->sk_err = si->cpt_err;
+	sk->sk_err_soft = si->cpt_err_soft;
+	sk->sk_priority = si->cpt_priority;
+	sk->sk_rcvlowat = si->cpt_rcvlowat;
+	sk->sk_rcvtimeo = si->cpt_rcvtimeo;
+	if (si->cpt_rcvtimeo == CPT_NULL)
+		sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_sndtimeo = si->cpt_sndtimeo;
+	if (si->cpt_sndtimeo == CPT_NULL)
+		sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_rcvbuf = si->cpt_rcvbuf;
+	sk->sk_sndbuf = si->cpt_sndbuf;
+	sk->sk_bound_dev_if = si->cpt_bound_dev_if;
+	sk->sk_flags = si->cpt_flags;
+	sk->sk_lingertime = si->cpt_lingertime;
+	if (si->cpt_lingertime == CPT_NULL)
+		sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_peercred.pid = si->cpt_peer_pid;
+	sk->sk_peercred.uid = si->cpt_peer_uid;
+	sk->sk_peercred.gid = si->cpt_peer_gid;
+	cpt_timeval_import(&sk->sk_stamp, si->cpt_stamp);
+	return 0;
+}
+
+static struct file *sock_mapfile(struct socket *sock)
+{
+	int fd = sock_map_fd(sock);
+
+	if (fd >= 0) {
+		struct file *file = sock->file;
+		get_file(file);
+		sc_close(fd);
+		return file;
+	}
+	return ERR_PTR(fd);
+}
+
+/* Assumption is that /tmp exists and writable.
+ * In previous versions we assumed that listen() will autobind
+ * the socket. It does not do this for AF_UNIX by evident reason:
+ * socket in abstract namespace is accessible, unlike socket bound
+ * to deleted FS object.
+ */
+
+static int
+select_deleted_name(char * name, cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<100; i++) {
+		struct nameidata nd;
+		unsigned int rnd = net_random();
+
+		sprintf(name, "/tmp/SOCK.%08x", rnd);
+
+		if (path_lookup(name, 0, &nd) != 0)
+			return 0;
+
+		path_release(&nd);
+	}
+
+	eprintk_ctx("failed to allocate deleted socket inode\n");
+	return -ELOOP;
+}
+
+static int
+bind_unix_socket(struct socket *sock, struct cpt_sock_image *si,
+		 cpt_context_t *ctx)
+{
+	int err;
+	char *name;
+	struct sockaddr* addr;
+	int addrlen;
+	struct sockaddr_un sun;
+	struct nameidata nd;
+
+	if ((addrlen = si->cpt_laddrlen) <= 2)
+		return 0;
+
+	nd.dentry = NULL;
+	name = ((char*)si->cpt_laddr) + 2;
+	addr = (struct sockaddr *)si->cpt_laddr;
+
+	if (name[0]) {
+		if (path_lookup(name, 0, &nd))
+			nd.dentry = NULL;
+
+		if (si->cpt_deleted) {
+			if (nd.dentry == NULL &&
+			    sock->ops->bind(sock, addr, addrlen) == 0) {
+				sc_unlink(name);
+				return 0;
+			}
+
+			addr = (struct sockaddr*)&sun;
+			addr->sa_family = AF_UNIX;
+			name = ((char*)addr) + 2;
+			err = select_deleted_name(name, ctx);
+			if (err)
+				goto out;
+			addrlen = 2 + strlen(name);
+		} else if (nd.dentry) {
+			if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) {
+				eprintk_ctx("bind_unix_socket: not a socket dentry\n");
+				err = -EINVAL;
+				goto out;
+			}
+			sc_unlink(name);
+		}
+	}
+
+	err = sock->ops->bind(sock, addr, addrlen);
+
+	if (!err && name[0]) {
+		if (nd.dentry) {
+			sc_chown(name, nd.dentry->d_inode->i_uid,
+				 nd.dentry->d_inode->i_gid);
+			sc_chmod(name, nd.dentry->d_inode->i_mode);
+		}
+		if (si->cpt_deleted)
+			sc_unlink(name);
+	}
+
+out:
+	if (nd.dentry)
+		path_release(&nd);
+	return err;
+}
+
+static int fixup_unix_address(struct socket *sock, struct cpt_sock_image *si,
+			      struct cpt_context *ctx)
+{
+	struct sock *sk = sock->sk;
+	cpt_object_t *obj;
+	struct sock *parent;
+
+	if (sk->sk_family != AF_UNIX || sk->sk_state == TCP_LISTEN)
+		return 0;
+
+	if (si->cpt_parent == -1)
+		return bind_unix_socket(sock, si, ctx);
+
+	obj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+	if (!obj)
+		return 0;
+
+	parent = obj->o_obj;
+	if (unix_sk(parent)->addr) {
+		if (unix_sk(sk)->addr &&
+		    atomic_dec_and_test(&unix_sk(sk)->addr->refcnt))
+			kfree(unix_sk(sk)->addr);
+		atomic_inc(&unix_sk(parent)->addr->refcnt);
+		unix_sk(sk)->addr = unix_sk(parent)->addr;
+	}
+	return 0;
+}
+
+
+static int open_socket(cpt_object_t *obj, struct cpt_sock_image *si,
+		       struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct socket *sock2 = NULL;
+	struct file *file;
+	cpt_object_t *fobj;
+	cpt_object_t *pobj = NULL;
+
+	err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
+			       &sock);
+	if (err)
+		return err;
+
+	if (si->cpt_socketpair) {
+		err = sock_create_kern(si->cpt_family, si->cpt_type,
+				       si->cpt_protocol, &sock2);
+		if (err)
+			goto err_out;
+
+		err = sock->ops->socketpair(sock, sock2);
+		if (err < 0)
+			goto err_out;
+
+		/* Socketpair with a peer outside our environment.
+		 * So, we create real half-open pipe and do not worry
+		 * about dead end anymore. */
+		if (si->cpt_peer == -1) {
+			sock_release(sock2);
+			sock2 = NULL;
+		}
+	}
+
+	cpt_obj_setobj(obj, sock->sk, ctx);
+
+	if (si->cpt_file != CPT_NULL) {
+		file = sock_mapfile(sock);
+		err = PTR_ERR(file);
+		if (IS_ERR(file))
+			goto err_out;
+
+		err = -ENOMEM;
+
+		obj->o_parent = file;
+
+		if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
+			goto err_out;
+		cpt_obj_setpos(fobj, si->cpt_file, ctx);
+		cpt_obj_setindex(fobj, si->cpt_index, ctx);
+	}
+
+	if (sock2) {
+		struct file *file2;
+
+		pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_peer, ctx);
+		if (!pobj) BUG();
+		if (pobj->o_obj) BUG();
+		cpt_obj_setobj(pobj, sock2->sk, ctx);
+
+		if (pobj->o_ppos != CPT_NULL) {
+			file2 = sock_mapfile(sock2);
+			err = PTR_ERR(file2);
+			if (IS_ERR(file2))
+				goto err_out;
+
+			err = -ENOMEM;
+			if ((fobj = cpt_object_add(CPT_OBJ_FILE, file2, ctx)) == NULL)
+				goto err_out;
+			cpt_obj_setpos(fobj, pobj->o_ppos, ctx);
+			cpt_obj_setindex(fobj, si->cpt_peer, ctx);
+
+			pobj->o_parent = file2;
+		}
+	}
+
+	setup_sock_common(sock->sk, si, obj->o_pos, ctx);
+	if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6) {
+		int saved_reuse = sock->sk->sk_reuse;
+
+		inet_sk(sock->sk)->freebind = 1;
+		sock->sk->sk_reuse = 2;
+		if (si->cpt_laddrlen) {
+			err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+			if (err) {
+				dprintk_ctx("binding failed: %d, do not worry\n", err);
+			}
+		}
+		sock->sk->sk_reuse = saved_reuse;
+		rst_socket_in(si, obj->o_pos, sock->sk, ctx);
+	} else if (sock->sk->sk_family == AF_NETLINK) {
+		err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+		if (err) {
+			eprintk_ctx("AF_NETLINK binding failed: %d\n", err);
+		}
+		if (si->cpt_raddrlen) {
+			err = sock->ops->connect(sock, (struct sockaddr *)&si->cpt_raddr, si->cpt_raddrlen, O_NONBLOCK);
+			if (err) {
+				eprintk_ctx("oops, AF_NETLINK connect failed: %d\n", err);
+			}
+		}
+	}
+	fixup_unix_address(sock, si, ctx);
+
+	if (sock2) {
+		err = rst_get_object(CPT_OBJ_SOCKET, pobj->o_pos, si, ctx);
+		if (err)
+			return err;
+		setup_sock_common(sock2->sk, si, pobj->o_pos, ctx);
+		fixup_unix_address(sock2, si, ctx);
+	}
+
+	if ((sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
+	    && (int)si->cpt_parent != -1) {
+		cpt_object_t *lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+		if (lobj && cpt_attach_accept(lobj->o_obj, sock->sk, ctx) == 0)
+			sock->sk = NULL;
+	}
+
+
+	if (si->cpt_file == CPT_NULL && sock->sk &&
+	    sock->sk->sk_family == AF_INET) {
+		struct sock *sk = sock->sk;
+
+		if (sk) {
+			sock->sk = NULL;
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			if (sock_owned_by_user(sk))
+				eprintk_ctx("oops, sock is locked by user\n");
+
+			sock_hold(sk);
+			sock_orphan(sk);
+			tcp_inc_orphan_count(sk);
+			bh_unlock_sock(sk);
+			local_bh_enable();
+			sock_put(sk);
+			dprintk_ctx("orphaning socket %p\n", sk);
+		}
+	}
+
+	if (si->cpt_file == CPT_NULL && sock->sk == NULL)
+		sock_release(sock);
+
+	return 0;
+
+err_out:
+	if (sock2)
+		sock_release(sock2);
+	sock_release(sock);
+	return err;
+}
+
+static int open_listening_socket(loff_t pos, struct cpt_sock_image *si,
+				 struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct file *file;
+	cpt_object_t *obj, *fobj;
+
+	err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
+			       &sock);
+	if (err) {
+		eprintk_ctx("open_listening_socket: sock_create_kern: %d\n", err);
+		return err;
+	}
+
+	sock->sk->sk_reuse = 2;
+	sock->sk->sk_bound_dev_if = si->cpt_bound_dev_if;
+
+	if (sock->sk->sk_family == AF_UNIX) {
+		err = bind_unix_socket(sock, si, ctx);
+	} else if (si->cpt_laddrlen) {
+		if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
+			inet_sk(sock->sk)->freebind = 1;
+
+		err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+
+		if (err) {
+			eprintk_ctx("open_listening_socket: bind: %d\n", err);
+			goto err_out;
+		}
+	}
+
+	err = sock->ops->listen(sock, si->cpt_max_ack_backlog);
+	if (err) {
+		eprintk_ctx("open_listening_socket: listen: %d, %Ld, %d\n", err, pos, si->cpt_deleted);
+		goto err_out;
+	}
+
+	/* Now we may access socket body directly and fixup all the things. */
+
+	file = sock_mapfile(sock);
+	err = PTR_ERR(file);
+	if (IS_ERR(file)) {
+		eprintk_ctx("open_listening_socket: map: %d\n", err);
+		goto err_out;
+	}
+
+	err = -ENOMEM;
+	if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
+		goto err_out;
+	if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sock->sk, ctx)) == NULL)
+		goto err_out;
+	cpt_obj_setpos(obj, pos, ctx);
+	cpt_obj_setindex(obj, si->cpt_index, ctx);
+	obj->o_parent = file;
+	cpt_obj_setpos(fobj, si->cpt_file, ctx);
+	cpt_obj_setindex(fobj, si->cpt_index, ctx);
+
+	setup_sock_common(sock->sk, si, pos, ctx);
+
+	if (si->cpt_family == AF_INET || si->cpt_family == AF_INET6)
+		rst_restore_synwait_queue(sock->sk, si, pos, ctx);
+
+	return 0;
+
+err_out:
+	sock_release(sock);
+	return err;
+}
+
+int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
+{
+	int err;
+	struct sk_filter *fp, *old_fp; 
+	loff_t pos = *pos_p;
+	struct cpt_obj_bits v;
+	
+	err = rst_get_object(CPT_OBJ_SKFILTER, pos, &v, ctx);
+	if (err)
+		return err;
+
+	*pos_p += v.cpt_next;
+
+	if (v.cpt_size % sizeof(struct sock_filter))
+		return -EINVAL;
+
+	fp = sock_kmalloc(sk, v.cpt_size+sizeof(*fp), GFP_KERNEL_UBC);
+	if (fp == NULL)
+		return -ENOMEM;
+	atomic_set(&fp->refcnt, 1);
+	fp->len = v.cpt_size/sizeof(struct sock_filter);
+
+	err = ctx->pread(fp->insns, v.cpt_size, ctx, pos+v.cpt_hdrlen);
+	if (err) {
+		sk_filter_release(sk, fp);
+		return err;
+	}
+
+	old_fp = sk->sk_filter;
+	sk->sk_filter = fp;
+	if (old_fp)
+		sk_filter_release(sk, old_fp);
+	return 0;
+}
+
+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx)
+{
+	int err;
+	struct sk_buff *skb;
+	struct cpt_skb_image v;
+	loff_t pos = *pos_p;
+	struct scm_fp_list *fpl = NULL;
+
+	err = rst_get_object(CPT_OBJ_SKB, pos, &v, ctx);
+	if (err)
+		return ERR_PTR(err);
+	*pos_p = pos + v.cpt_next;
+
+	if (owner)
+		*owner = v.cpt_owner;
+	if (queue)
+		*queue = v.cpt_queue;
+
+	skb = alloc_skb(v.cpt_len + v.cpt_hspace + v.cpt_tspace, GFP_KERNEL);
+	if (skb == NULL)
+		return ERR_PTR(-ENOMEM);
+	skb_reserve(skb, v.cpt_hspace);
+	skb_put(skb, v.cpt_len);
+	skb->h.raw = skb->head + v.cpt_h;
+	skb->nh.raw = skb->head + v.cpt_nh;
+	skb->mac.raw = skb->head + v.cpt_mac;
+	memcpy(skb->cb, v.cpt_cb, sizeof(skb->cb));
+	skb->mac_len = v.cpt_mac_len;
+
+	skb->csum = v.cpt_csum;
+	skb->local_df = v.cpt_local_df;
+	skb->pkt_type = v.cpt_pkt_type;
+	skb->ip_summed = v.cpt_ip_summed;
+	skb->priority = v.cpt_priority;
+	skb->protocol = v.cpt_protocol;
+	skb->security = v.cpt_security;
+	cpt_timeval_import(&skb->stamp, v.cpt_stamp);
+
+	skb_shinfo(skb)->tso_segs = v.cpt_tso_segs;
+	skb_shinfo(skb)->tso_size = v.cpt_tso_size;
+	if (ctx->image_version == CPT_VERSION_8) {
+		skb_shinfo(skb)->tso_segs = 1;
+		skb_shinfo(skb)->tso_size = 0;
+	}
+
+	if (v.cpt_next > v.cpt_hdrlen) {
+		pos = pos + v.cpt_hdrlen;
+		while (pos < *pos_p) {
+			union {
+				struct cpt_obj_bits b;
+				struct cpt_fd_image f;
+			} u;
+
+			err = rst_get_object(-1, pos, &u, ctx);
+			if (err) {
+				kfree_skb(skb);
+				return ERR_PTR(err);
+			}
+			if (u.b.cpt_object == CPT_OBJ_BITS) {
+				if (u.b.cpt_size != v.cpt_hspace + skb->len) {
+					eprintk_ctx("invalid skb image %u != %u + %u\n", u.b.cpt_size, v.cpt_hspace, skb->len);
+					kfree_skb(skb);
+					return ERR_PTR(-EINVAL);
+				}
+
+				err = ctx->pread(skb->head, u.b.cpt_size, ctx, pos+u.b.cpt_hdrlen);
+				if (err) {
+					kfree_skb(skb);
+					return ERR_PTR(err);
+				}
+			} else if (u.f.cpt_object == CPT_OBJ_FILEDESC) {
+				if (!fpl) {
+					fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+					if (!fpl) {
+						kfree_skb(skb);
+						return ERR_PTR(-ENOMEM);
+					}
+					fpl->count = 0;
+					UNIXCB(skb).fp = fpl;
+				}
+				fpl->fp[fpl->count] = rst_file(u.f.cpt_file, -1, ctx);
+				if (!IS_ERR(fpl->fp[fpl->count]))
+					fpl->count++;
+			}
+			pos += u.b.cpt_next;
+		}
+	}
+
+	return skb;
+}
+
+static int restore_unix_rqueue(struct sock *sk, struct cpt_sock_image *si,
+			       loff_t pos, struct cpt_context *ctx)
+{
+	loff_t endpos;
+
+	pos = pos + si->cpt_hdrlen;
+	endpos = pos + si->cpt_next;
+	while (pos < endpos) {
+		struct sk_buff *skb;
+		struct sock *owner_sk;
+		__u32 owner;
+
+		skb = rst_skb(&pos, &owner, NULL, ctx);
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -EINVAL) {
+				int err;
+
+				err = rst_sock_attr(&pos, sk, ctx);
+				if (err)
+					return err;
+			}
+			return PTR_ERR(skb);
+		}
+
+		owner_sk = unix_peer(sk);
+		if (owner != -1) {
+			cpt_object_t *pobj;
+			pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, owner, ctx);
+			if (pobj == NULL) {
+				eprintk_ctx("orphan af_unix skb?\n");
+				kfree_skb(skb);
+				continue;
+			}
+			owner_sk = pobj->o_obj;
+		}
+		if (owner_sk == NULL) {
+			dprintk_ctx("orphan af_unix skb 2?\n");
+			kfree_skb(skb);
+			continue;
+		}
+		skb_set_owner_w(skb, owner_sk);
+		if (UNIXCB(skb).fp)
+			skb->destructor = unix_destruct_fds;
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		if (sk->sk_state == TCP_LISTEN) {
+			struct socket *sock = skb->sk->sk_socket;
+			if (sock == NULL) BUG();
+			if (sock->file) BUG();
+			skb->sk->sk_socket = NULL;
+			skb->sk->sk_sleep = NULL;
+			sock->sk = NULL;
+			sock_release(sock);
+		}
+	}
+	return 0;
+}
+
+
+/* All the sockets are created before we start to open files */
+
+int rst_sockets(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_SOCKET];
+	loff_t endsec;
+	cpt_object_t *obj;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err) {
+		eprintk_ctx("rst_sockets: ctx->pread: %d\n", err);
+		return err;
+	}
+	if (h.cpt_section != CPT_SECT_SOCKET || h.cpt_hdrlen < sizeof(h)) {
+		eprintk_ctx("rst_sockets: hdr err\n");
+		return -EINVAL;
+	}
+
+	/* The first pass: we create socket index and open listening sockets. */
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (sbuf->cpt_state == TCP_LISTEN) {
+			err = open_listening_socket(sec, sbuf, ctx); 
+			cpt_release_buf(ctx);
+			if (err) {
+				eprintk_ctx("rst_sockets: open_listening_socket: %d\n", err);
+				return err;
+			}
+		} else {
+			cpt_release_buf(ctx);
+			obj = alloc_cpt_object(GFP_KERNEL, ctx);
+			if (obj == NULL)
+				return -ENOMEM;
+			cpt_obj_setindex(obj, sbuf->cpt_index, ctx);
+			cpt_obj_setpos(obj, sec, ctx);
+			obj->o_ppos  = sbuf->cpt_file;
+			intern_cpt_object(CPT_OBJ_SOCKET, obj, ctx);
+		}
+		sec += sbuf->cpt_next;
+	}
+
+	/* Pass 2: really restore sockets */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct cpt_sock_image *sbuf;
+		if (obj->o_obj != NULL)
+			continue;
+		sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (sbuf->cpt_state == TCP_LISTEN) BUG();
+		err = open_socket(obj, sbuf, ctx); 
+		cpt_release_buf(ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: open_socket: %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int rst_orphans(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_ORPHANS];
+	loff_t endsec;
+	cpt_object_t *obj;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_ORPHANS || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		if (obj == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		obj->o_pos = sec;
+		obj->o_ppos  = sbuf->cpt_file;
+		err = open_socket(obj, sbuf, ctx);
+		dprintk_ctx("Restoring orphan: %d\n", err);
+		free_cpt_object(obj, ctx);
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		sec += sbuf->cpt_next;
+	}
+
+	return 0;
+}
+
+
+/* Pass 3: I understand, this is not funny already :-),
+ * but we have to do another pass to establish links between
+ * not-paired AF_UNIX SOCK_DGRAM sockets and to restore AF_UNIX
+ * skb queues with proper skb->sk links.
+ *
+ * This could be made at the end of rst_sockets(), but we defer
+ * restoring af_unix queues up to the end of restoring files to
+ * make restoring passed FDs cleaner.
+ */
+
+int rst_sockets_complete(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct cpt_sock_image *sbuf;
+		struct sock *sk = obj->o_obj;
+		struct sock *peer;
+
+		if (!sk) BUG();
+
+		if (sk->sk_family != AF_UNIX)
+			continue;
+
+		sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+
+		if (sbuf->cpt_next > sbuf->cpt_hdrlen)
+			restore_unix_rqueue(sk, sbuf, obj->o_pos, ctx);
+
+		cpt_release_buf(ctx);
+
+		if (sk->sk_type == SOCK_DGRAM && unix_peer(sk) == NULL) {
+			cpt_object_t *pobj;
+
+			sbuf = cpt_get_buf(ctx);
+			err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+			if (err) {
+				cpt_release_buf(ctx);
+				return err;
+			}
+
+			if (sbuf->cpt_peer != -1) {
+				pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, sbuf->cpt_peer, ctx);
+				if (pobj) {
+					peer = pobj->o_obj;
+					sock_hold(peer);
+					unix_peer(sk) = peer;
+				}
+			}
+			cpt_release_buf(ctx);
+		}
+	}
+
+	rst_orphans(ctx);
+
+	return 0;
+}
+
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_socket_in.c linux-2.6.9-ve023stab054/kernel/cpt/rst_socket_in.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_socket_in.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_socket_in.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,522 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <linux/jhash.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/ipv6.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+static inline unsigned long jiffies_import(__u32 tmo)
+{
+	__s32 delta = tmo;
+	return jiffies + (long)delta;
+}
+
+static inline __u32 tcp_jiffies_import(__u32 tmo)
+{
+	return ((__u32)jiffies) + tmo;
+}
+
+
+static int restore_queues(struct sock *sk, struct cpt_sock_image *si,
+			  loff_t pos, struct cpt_context *ctx)
+{
+	loff_t endpos;
+
+	pos = pos + si->cpt_hdrlen;
+	endpos = pos + si->cpt_next;
+	while (pos < endpos) {
+		struct sk_buff *skb;
+		__u32 type;
+
+		skb = rst_skb(&pos, NULL, &type, ctx);
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -EINVAL) {
+				int err;
+
+				err = rst_sock_attr(&pos, sk, ctx);
+				if (err)
+					return err;
+			}
+			return PTR_ERR(skb);
+		}
+
+		if (sk->sk_type == SOCK_STREAM) {
+			if (type == CPT_SKB_RQ) {
+				sk_stream_set_owner_r(skb, sk);
+				ub_tcprcvbuf_charge_forced(sk, skb);
+				skb_queue_tail(&sk->sk_receive_queue, skb);
+			} else if (type == CPT_SKB_OFOQ) {
+				struct tcp_opt *tp = tcp_sk(sk);
+				sk_stream_set_owner_r(skb, sk);
+				ub_tcprcvbuf_charge_forced(sk, skb);
+				skb_queue_tail(&tp->out_of_order_queue, skb);
+			} else if (type == CPT_SKB_WQ) {
+				sk->sk_wmem_queued += skb->truesize;
+				sk->sk_forward_alloc -= skb->truesize;
+				ub_tcpsndbuf_charge_forced(sk, skb);
+				skb_queue_tail(&sk->sk_write_queue, skb);
+			} else {
+				wprintk_ctx("strange stream queue type %u\n", type);
+				kfree_skb(skb);
+			}
+		} else {
+			if (type == CPT_SKB_RQ) {
+				skb_set_owner_r(skb, sk);
+				skb_queue_tail(&sk->sk_receive_queue, skb);
+			} else if (type == CPT_SKB_WQ) {
+				struct inet_opt *inet = inet_sk(sk);
+				if (inet->cork.fragsize) {
+					skb_set_owner_w(skb, sk);
+					skb_queue_tail(&sk->sk_write_queue, skb);
+				} else {
+					eprintk_ctx("cork skb is dropped\n");
+					kfree_skb(skb);
+				}
+			} else {
+				wprintk_ctx("strange dgram queue type %u\n", type);
+				kfree_skb(skb);
+			}
+		}
+	}
+	return 0;
+}
+
+static struct sock *find_parent(__u16 sport, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk &&
+		    sk->sk_state == TCP_LISTEN &&
+		    (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
+		    inet_sk(sk)->sport == sport)
+			return sk;
+	}
+	return NULL;
+}
+
+static int rst_socket_tcp(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
+			  struct cpt_context *ctx)
+{
+	struct tcp_opt *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	tp->pred_flags = si->cpt_pred_flags;
+	tp->rcv_nxt = si->cpt_rcv_nxt;
+	tp->snd_nxt = si->cpt_snd_nxt;
+	tp->snd_una = si->cpt_snd_una;
+	tp->snd_sml = si->cpt_snd_sml;
+	tp->rcv_tstamp = tcp_jiffies_import(si->cpt_rcv_tstamp);
+	tp->lsndtime = tcp_jiffies_import(si->cpt_lsndtime);
+	tp->tcp_header_len = si->cpt_tcp_header_len;
+	tp->ack.pending = si->cpt_ack_pending;
+	tp->ack.quick = si->cpt_quick;
+	tp->ack.pingpong = si->cpt_pingpong;
+	tp->ack.blocked = si->cpt_blocked;
+	tp->ack.ato = si->cpt_ato;
+	tp->ack.timeout = jiffies_import(si->cpt_ack_timeout);
+	tp->ack.lrcvtime = tcp_jiffies_import(si->cpt_lrcvtime);
+	tp->ack.last_seg_size = si->cpt_last_seg_size;
+	tp->ack.rcv_mss = si->cpt_rcv_mss;
+	tp->snd_wl1 = si->cpt_snd_wl1;
+	tp->snd_wnd = si->cpt_snd_wnd;
+	tp->max_window = si->cpt_max_window;
+	tp->pmtu_cookie = si->cpt_pmtu_cookie;
+	tp->mss_cache = si->cpt_mss_cache;
+	tp->mss_cache_std = si->cpt_mss_cache_std;
+	tp->rx_opt.mss_clamp = si->cpt_mss_clamp;
+	tp->ext_header_len = si->cpt_ext_header_len;
+	tp->ext2_header_len = si->cpt_ext2_header_len;
+	tp->ca_state = si->cpt_ca_state;
+	tp->retransmits = si->cpt_retransmits;
+	tp->reordering = si->cpt_reordering;
+	tp->frto_counter = si->cpt_frto_counter;
+	tp->frto_highmark = si->cpt_frto_highmark;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
+	tp->adv_cong = si->cpt_adv_cong;
+#endif
+	tp->defer_accept = si->cpt_defer_accept;
+	tp->backoff = si->cpt_backoff;
+	tp->srtt = si->cpt_srtt;
+	tp->mdev = si->cpt_mdev;
+	tp->mdev_max = si->cpt_mdev_max;
+	tp->rttvar = si->cpt_rttvar;
+	tp->rtt_seq = si->cpt_rtt_seq;
+	tp->rto = si->cpt_rto;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+	tp->packets_out.val = si->cpt_packets_out;
+	tp->left_out.val = si->cpt_left_out;
+	tp->retrans_out.val = si->cpt_retrans_out;
+	tp->lost_out.val = si->cpt_lost_out;
+	tp->sacked_out.val = si->cpt_sacked_out;
+	tp->fackets_out.val = si->cpt_fackets_out;
+#else
+	tp->packets_out = si->cpt_packets_out;
+	tp->left_out = si->cpt_left_out;
+	tp->retrans_out = si->cpt_retrans_out;
+	tp->lost_out = si->cpt_lost_out;
+	tp->sacked_out = si->cpt_sacked_out;
+	tp->fackets_out = si->cpt_fackets_out;
+#endif
+	tp->snd_ssthresh = si->cpt_snd_ssthresh;
+	tp->snd_cwnd = si->cpt_snd_cwnd;
+	tp->snd_cwnd_cnt = si->cpt_snd_cwnd_cnt;
+	tp->snd_cwnd_clamp = si->cpt_snd_cwnd_clamp;
+	tp->snd_cwnd_used = si->cpt_snd_cwnd_used;
+	tp->snd_cwnd_stamp = tcp_jiffies_import(si->cpt_snd_cwnd_stamp);
+	tp->timeout = tcp_jiffies_import(si->cpt_timeout);
+	tp->rcv_wnd = si->cpt_rcv_wnd;
+	tp->rcv_wup = si->cpt_rcv_wup;
+	tp->write_seq = si->cpt_write_seq;
+	tp->pushed_seq = si->cpt_pushed_seq;
+	tp->copied_seq = si->cpt_copied_seq;
+	tp->rx_opt.tstamp_ok = si->cpt_tstamp_ok;
+	tp->rx_opt.wscale_ok = si->cpt_wscale_ok;
+	tp->rx_opt.sack_ok = si->cpt_sack_ok;
+	tp->rx_opt.saw_tstamp = si->cpt_saw_tstamp;
+	tp->rx_opt.snd_wscale = si->cpt_snd_wscale;
+	tp->rx_opt.rcv_wscale = si->cpt_rcv_wscale;
+	tp->nonagle = si->cpt_nonagle;
+	tp->keepalive_probes = si->cpt_keepalive_probes;
+	tp->rx_opt.rcv_tsval = si->cpt_rcv_tsval;
+	tp->rx_opt.rcv_tsecr = si->cpt_rcv_tsecr;
+	tp->rx_opt.ts_recent = si->cpt_ts_recent;
+	tp->rx_opt.ts_recent_stamp = si->cpt_ts_recent_stamp;
+	tp->rx_opt.user_mss = si->cpt_user_mss;
+	tp->rx_opt.dsack = si->cpt_dsack;
+	tp->rx_opt.eff_sacks = si->cpt_num_sacks;
+	tp->duplicate_sack[0].start_seq = si->cpt_sack_array[0];
+	tp->duplicate_sack[0].end_seq = si->cpt_sack_array[1];
+	tp->selective_acks[0].start_seq = si->cpt_sack_array[2];
+	tp->selective_acks[0].end_seq = si->cpt_sack_array[3];
+	tp->selective_acks[1].start_seq = si->cpt_sack_array[4];
+	tp->selective_acks[1].end_seq = si->cpt_sack_array[5];
+	tp->selective_acks[2].start_seq = si->cpt_sack_array[6];
+	tp->selective_acks[2].end_seq = si->cpt_sack_array[7];
+	tp->selective_acks[3].start_seq = si->cpt_sack_array[8];
+	tp->selective_acks[3].end_seq = si->cpt_sack_array[9];
+
+	tp->window_clamp = si->cpt_window_clamp;
+	tp->rcv_ssthresh = si->cpt_rcv_ssthresh;
+	tp->probes_out = si->cpt_probes_out;
+	tp->rx_opt.num_sacks = si->cpt_num_sacks;
+	tp->advmss = si->cpt_advmss;
+	tp->syn_retries = si->cpt_syn_retries;
+	tp->ecn_flags = si->cpt_ecn_flags;
+	tp->prior_ssthresh = si->cpt_prior_ssthresh;
+	tp->high_seq = si->cpt_high_seq;
+	tp->retrans_stamp = si->cpt_retrans_stamp;
+	tp->undo_marker = si->cpt_undo_marker;
+	tp->undo_retrans = si->cpt_undo_retrans;
+	tp->urg_seq = si->cpt_urg_seq;
+	tp->urg_data = si->cpt_urg_data;
+	tp->pending = si->cpt_pending;
+	tp->urg_mode = si->cpt_urg_mode;
+	tp->snd_up = si->cpt_snd_up;
+	tp->keepalive_time = si->cpt_keepalive_time;
+	tp->keepalive_intvl = si->cpt_keepalive_intvl;
+	tp->linger2 = si->cpt_linger2;
+
+	sk->sk_send_head = NULL;
+	for (skb = skb_peek(&sk->sk_write_queue);
+	     skb && skb != (struct sk_buff*)&sk->sk_write_queue;
+	     skb = skb->next) {
+		if (!after(tp->snd_nxt, TCP_SKB_CB(skb)->seq)) {
+			sk->sk_send_head = skb;
+			break;
+		}
+	}
+
+	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) {
+		struct inet_opt *inet = inet_sk(sk);
+		if (inet->num == 0) {
+			cpt_object_t *lobj = NULL;
+
+			if ((int)si->cpt_parent != -1)
+				lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+
+			if (lobj && lobj->o_obj) {
+				inet->num = ntohs(inet->sport);
+				tcp_inherit_port(lobj->o_obj, sk);
+				dprintk_ctx("port inherited from parent\n");
+			} else {
+				struct sock *lsk = find_parent(inet->sport, ctx);
+				if (lsk) {
+					inet->num = ntohs(inet->sport);
+					tcp_inherit_port(lsk, sk);
+					dprintk_ctx("port inherited\n");
+				} else {
+					eprintk_ctx("we are kinda lost...\n");
+				}
+			}
+		}
+
+		sk->sk_prot->hash(sk);
+
+		if (tp->ack.pending&TCP_ACK_TIMER)
+			sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
+		if (tp->pending)
+			sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
+		if (sock_flag(sk, SOCK_KEEPOPEN)) {
+			unsigned long expires = jiffies_import(si->cpt_ka_timeout);
+			if (time_after(jiffies, expires))
+				expires = jiffies + HZ;
+			sk_reset_timer(sk, &sk->sk_timer, expires);
+		}
+	}
+
+	return 0;
+}
+
+
+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
+		  struct cpt_context *ctx)
+{
+	struct inet_opt *inet = inet_sk(sk);
+
+	lock_sock(sk);
+
+	sk->sk_state = si->cpt_state;
+
+	inet->daddr = si->cpt_daddr;
+	inet->dport = si->cpt_dport;
+	inet->saddr = si->cpt_saddr;
+	inet->rcv_saddr = si->cpt_rcv_saddr;
+	inet->sport = si->cpt_sport;
+	inet->uc_ttl = si->cpt_uc_ttl;
+	inet->tos = si->cpt_tos;
+	inet->cmsg_flags = si->cpt_cmsg_flags;
+	inet->mc_index = si->cpt_mc_index;
+	inet->mc_addr = si->cpt_mc_addr;
+	inet->hdrincl = si->cpt_hdrincl;
+	inet->mc_ttl = si->cpt_mc_ttl;
+	inet->mc_loop = si->cpt_mc_loop;
+	inet->pmtudisc = si->cpt_pmtudisc;
+	inet->recverr = si->cpt_recverr;
+	inet->freebind = si->cpt_freebind;
+	inet->id = si->cpt_idcounter;
+
+	inet->cork.flags = si->cpt_cork_flags;
+	inet->cork.fragsize = si->cpt_cork_fragsize;
+	inet->cork.length = si->cpt_cork_length;
+	inet->cork.addr = si->cpt_cork_addr;
+	inet->cork.fl.fl4_src = si->cpt_cork_saddr;
+	inet->cork.fl.fl4_dst = si->cpt_cork_daddr;
+	inet->cork.fl.oif = si->cpt_cork_oif;
+	if (inet->cork.fragsize) {
+		if (ip_route_output_key(&inet->cork.rt, &inet->cork.fl)) {
+			eprintk_ctx("failed to restore cork route\n");
+			inet->cork.fragsize = 0;
+		}
+	}
+
+	if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
+		struct udp_opt *up = udp_sk(sk);
+		up->pending = si->cpt_udp_pending;
+		up->corkflag = si->cpt_udp_corkflag;
+		up->encap_type = si->cpt_udp_encap;
+		up->len = si->cpt_udp_len;
+	}
+
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		memcpy(&np->saddr, si->cpt_saddr6, 16);
+		memcpy(&np->rcv_saddr, si->cpt_rcv_saddr6, 16);
+		memcpy(&np->daddr, si->cpt_daddr6, 16);
+		np->flow_label = si->cpt_flow_label6;
+		np->frag_size = si->cpt_frag_size6;
+		np->hop_limit = si->cpt_hop_limit6;
+		np->mcast_hops = si->cpt_mcast_hops6;
+		np->mcast_oif = si->cpt_mcast_oif6;
+		np->rxopt.all = si->cpt_rxopt6;
+		np->mc_loop = si->cpt_mc_loop6;
+		np->recverr = si->cpt_recverr6;
+		np->sndflow = si->cpt_sndflow6;
+		np->pmtudisc = si->cpt_pmtudisc6;
+		np->ipv6only = si->cpt_ipv6only6;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		if (si->cpt_mapped) {
+			if (sk->sk_type == SOCK_STREAM &&
+			    sk->sk_protocol == IPPROTO_TCP) {
+				struct tcp_opt *tp = tcp_sk(sk);
+				tp->af_specific = &ipv6_mapped;
+				sk->sk_backlog_rcv = tcp_v4_do_rcv;
+			}
+		}
+#endif
+	}
+
+	restore_queues(sk, si, pos, ctx);
+
+	if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
+		rst_socket_tcp(si, pos, sk, ctx);
+
+	release_sock(sk);
+	return 0;
+}
+
+int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *ctx)
+{
+	struct open_request *req;
+
+	if (lsk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	req = tcp_openreq_alloc();
+	if (!req)
+		return -ENOMEM;
+
+	memset(req, 0, sizeof(*req));
+	sk->sk_socket = NULL;
+	sk->sk_sleep = NULL;
+	tcp_acceptq_queue(lsk, req, sk);
+	return 0;
+}
+
+static __inline__ u32 __tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
+{
+	return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
+}
+
+static void __tcp_v4_synq_add(struct sock *sk, struct open_request *req)
+{
+	struct tcp_opt *tp = tcp_sk(sk);
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	u32 h = __tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
+
+	req->dl_next = lopt->syn_table[h];
+
+	write_lock(&tp->syn_wait_lock);
+	lopt->syn_table[h] = req;
+	write_unlock(&tp->syn_wait_lock);
+
+	tcp_synq_added(sk);
+	if (req->retrans != 0)
+		lopt->qlen_young--;
+}
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
+{
+	u32 a, b, c;
+
+	a = raddr->s6_addr32[0];
+	b = raddr->s6_addr32[1];
+	c = raddr->s6_addr32[2];
+
+	a += JHASH_GOLDEN_RATIO;
+	b += JHASH_GOLDEN_RATIO;
+	c += rnd;
+	__jhash_mix(a, b, c);
+
+	a += raddr->s6_addr32[3];
+	b += (u32) rport;
+	__jhash_mix(a, b, c);
+
+	return c & (TCP_SYNQ_HSIZE - 1);
+}
+
+static void __tcp_v6_synq_add(struct sock *sk, struct open_request *req)
+{
+	struct tcp_opt *tp = tcp_sk(sk);
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
+
+	req->dl_next = lopt->syn_table[h];
+
+	write_lock(&tp->syn_wait_lock);
+	lopt->syn_table[h] = req;
+	write_unlock(&tp->syn_wait_lock);
+
+	tcp_synq_added(sk);
+	if (req->retrans != 0)
+		lopt->qlen_young--;
+}
+#endif
+
+int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si,
+			      loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end = si->cpt_next;
+
+	pos += si->cpt_hdrlen;
+
+	lock_sock(sk);
+	while (pos < end) {
+		struct cpt_openreq_image oi;
+
+		err = rst_get_object(CPT_OBJ_OPENREQ, pos, &oi, ctx);
+		if (err) {
+			err = rst_sock_attr(&pos, sk, ctx);
+			if (err) {
+				release_sock(sk);
+				return err;
+			}
+
+			continue;
+		}
+
+		if (oi.cpt_object == CPT_OBJ_OPENREQ) {
+			struct open_request *req = tcp_openreq_alloc();
+			if (req == NULL) {
+				release_sock(sk);
+				return -ENOMEM;
+			}
+
+			memset(req, 0, sizeof(*req));
+			req->rcv_isn = oi.cpt_rcv_isn;
+			req->snt_isn = oi.cpt_snt_isn;
+			req->rmt_port = oi.cpt_rmt_port;
+			req->mss = oi.cpt_mss;
+			req->retrans = oi.cpt_retrans;
+			req->snd_wscale = oi.cpt_snd_wscale;
+			req->rcv_wscale = oi.cpt_rcv_wscale;
+			req->tstamp_ok = oi.cpt_tstamp_ok;
+			req->sack_ok = oi.cpt_sack_ok;
+			req->wscale_ok = oi.cpt_wscale_ok;
+			req->ecn_ok = oi.cpt_ecn_ok;
+			req->acked = oi.cpt_acked;
+			req->window_clamp = oi.cpt_window_clamp;
+			req->rcv_wnd = oi.cpt_rcv_wnd;
+			req->ts_recent = oi.cpt_ts_recent;
+			req->expires = jiffies_import(oi.cpt_expires);
+
+			if (oi.cpt_family == AF_INET) {
+				memcpy(&req->af.v4_req.loc_addr, oi.cpt_loc_addr, 4);
+				memcpy(&req->af.v4_req.rmt_addr, oi.cpt_rmt_addr, 4);
+				req->class = &or_ipv4;
+				__tcp_v4_synq_add(sk, req);
+			} else {
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+				memcpy(&req->af.v6_req.loc_addr, oi.cpt_loc_addr, 16);
+				memcpy(&req->af.v6_req.rmt_addr, oi.cpt_rmt_addr, 16);
+				req->af.v6_req.iif = oi.cpt_iif;
+				req->class = &or_ipv6;
+				__tcp_v6_synq_add(sk, req);
+#endif
+			}
+		}
+		pos += oi.cpt_next;
+	}
+	release_sock(sk);
+	return 0;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_sysvipc.c linux-2.6.9-ve023stab054/kernel/cpt/rst_sysvipc.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_sysvipc.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_sysvipc.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,543 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+#include <linux/msg.h>
+#include <asm/ipc.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <ub/ub_mem.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+
+struct _warg {
+		struct file		*file;
+		struct cpt_sysvshm_image	*v;
+};
+
+static int fixup_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	struct _warg *warg = arg;
+
+	if (shp->shm_file != warg->file)
+		return 0;
+	if (shp->shm_nattch)
+		return -EEXIST;
+
+	shp->shm_perm.uid = warg->v->cpt_uid;
+	shp->shm_perm.gid = warg->v->cpt_gid;
+	shp->shm_perm.cuid = warg->v->cpt_cuid;
+	shp->shm_perm.cgid = warg->v->cpt_cgid;
+	shp->shm_perm.mode = warg->v->cpt_mode;
+
+	shp->shm_atim = warg->v->cpt_atime;
+	shp->shm_dtim = warg->v->cpt_dtime;
+	shp->shm_ctim = warg->v->cpt_ctime;
+	shp->shm_cprid = warg->v->cpt_creator;
+	shp->shm_lprid = warg->v->cpt_last;
+
+	/* TODO: fix shp->mlock_user? */
+	return 1;
+}
+
+static int fixup_shm(struct file *file, struct cpt_sysvshm_image *v)
+{
+	struct _warg warg;
+
+	warg.file = file;
+	warg.v = v;
+
+	return sysvipc_walk_shm(fixup_one_shm, &warg);
+}
+
+static int fixup_shm_data(struct file *file, loff_t pos, loff_t end,
+			  struct cpt_context *ctx)
+{
+	struct cpt_page_block pgb;
+	ssize_t (*do_write)(struct file *, const char __user *, size_t, loff_t *ppos);
+
+	do_write = file->f_dentry->d_inode->i_fop->write;
+	if (do_write == NULL) {
+		eprintk_ctx("No TMPFS? Cannot restore content of SYSV SHM\n");
+		return -EINVAL;
+	}
+
+	while (pos < end) {
+		loff_t opos;
+		loff_t ipos;
+		int count;
+		int err;
+
+		err = rst_get_object(CPT_OBJ_PAGES, pos, &pgb, ctx);
+		if (err)
+			return err;
+		dprintk_ctx("restoring SHM block: %08x-%08x\n",
+		       (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
+		ipos = pos + pgb.cpt_hdrlen;
+		opos = pgb.cpt_start;
+		count = pgb.cpt_end-pgb.cpt_start;
+		while (count > 0) {
+			mm_segment_t oldfs;
+			int copy = count;
+
+			if (copy > PAGE_SIZE)
+				copy = PAGE_SIZE;
+			(void)cpt_get_buf(ctx);
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
+			set_fs(oldfs);
+			if (err) {
+				__cpt_release_buf(ctx);
+				return err;
+			}
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			ipos += copy;
+			err = do_write(file, ctx->tmpbuf, copy, &opos);
+			set_fs(oldfs);
+			__cpt_release_buf(ctx);
+			if (err != copy) {
+				eprintk_ctx("write() failure\n");
+				if (err >= 0)
+					err = -EIO;
+				return err;
+			}
+			count -= copy;
+		}
+		pos += pgb.cpt_next;
+	}
+	return 0;
+}
+
+struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx)
+{
+	struct file *file;
+	int err;
+	loff_t dpos, epos;
+	union {
+		struct cpt_file_image		fi;
+		struct cpt_sysvshm_image	shmi;
+		struct cpt_inode_image 		ii;
+	} u;
+
+	err = rst_get_object(CPT_OBJ_FILE, pos, &u.fi, ctx);
+	if (err < 0)
+		goto err_out;
+	pos = u.fi.cpt_inode;
+	err = rst_get_object(CPT_OBJ_INODE, pos, &u.ii, ctx);
+	if (err < 0)
+		goto err_out;
+	dpos = pos + u.ii.cpt_hdrlen;
+	epos = pos + u.ii.cpt_next;
+	err = rst_get_object(CPT_OBJ_SYSV_SHM, pos + u.ii.cpt_hdrlen, &u.shmi, ctx);
+	if (err < 0)
+		goto err_out;
+	dpos += u.shmi.cpt_next;
+
+	file = sysvipc_setup_shm(u.shmi.cpt_key, u.shmi.cpt_id, 
+				 u.shmi.cpt_segsz, u.shmi.cpt_mode);
+	if (!IS_ERR(file)) {
+		err = fixup_shm(file, &u.shmi);
+		if (err != -EEXIST && dpos < epos)
+			err = fixup_shm_data(file, dpos, epos, ctx);
+	}
+
+	return file;
+
+err_out:
+	return ERR_PTR(err);
+}
+
+static int attach_one_undo(int semid, struct sem_array *sma, void *arg)
+{
+	struct sem_undo *su = arg;
+	struct sem_undo_list *undo_list = current->sysvsem.undo_list;
+
+	if (semid != su->semid)
+		return 0;
+
+	su->proc_next = undo_list->proc_list;
+	undo_list->proc_list = su;
+
+	su->id_next = sma->undo;
+	sma->undo = su;
+
+	return 1;
+}
+
+static int attach_undo(struct sem_undo *su)
+{
+	return sysvipc_walk_sem(attach_one_undo, su);
+}
+
+static int do_rst_semundo(struct cpt_object_hdr *sui, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	struct sem_undo_list *undo_list;
+
+	if (current->sysvsem.undo_list) {
+		eprintk_ctx("Funny undo_list\n");
+		return 0;
+	}
+
+	undo_list = ub_kmalloc(sizeof(struct sem_undo_list), GFP_KERNEL);
+	if (undo_list == NULL)
+		return -ENOMEM;
+	memset(undo_list, 0, sizeof(struct sem_undo_list));
+	atomic_set(&undo_list->refcnt, 1);
+	spin_lock_init(&undo_list->lock);
+	current->sysvsem.undo_list = undo_list;
+
+	if (sui->cpt_next > sui->cpt_hdrlen) {
+		loff_t offset = pos + sui->cpt_hdrlen;
+		do {
+			struct sem_undo *new;
+			struct cpt_sysvsem_undo_image spi;
+			err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO_REC, offset, &spi, ctx);
+			if (err)
+				goto out;
+			new = ub_kmalloc(sizeof(struct sem_undo) +
+					 sizeof(short)*spi.cpt_nsem, GFP_KERNEL);
+			if (!new) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*spi.cpt_nsem);
+			new->semadj = (short *) &new[1];
+			new->semid = spi.cpt_id;
+			err = ctx->pread(new->semadj, spi.cpt_nsem*sizeof(short), ctx, offset + spi.cpt_hdrlen);
+			if (err) {
+				kfree(new);
+				goto out;
+			}
+			err = attach_undo(new);
+			if (err <= 0) {
+				if (err == 0)
+					err = -ENOENT;
+				kfree(new);
+				goto out;
+			}
+			offset += spi.cpt_next;
+		} while (offset < pos + sui->cpt_next);
+	}
+	err = 0;
+
+out:
+	return err;
+}
+
+__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+#if 0
+	if (ti->cpt_sysvsem_undo == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo))
+		flag |= CLONE_SYSVSEM;
+#endif
+	return flag;
+}
+
+int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err;
+	struct sem_undo_list *f = current->sysvsem.undo_list;
+	cpt_object_t *obj;
+	struct cpt_object_hdr sui;
+
+	if (ti->cpt_sysvsem_undo == CPT_NULL) {
+		exit_sem(current);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			exit_sem(current);
+			f = obj->o_obj;
+			atomic_inc(&f->refcnt);
+			current->sysvsem.undo_list = f;
+		}
+		return 0;
+	}
+
+	if ((err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, &sui, ctx)) != 0)
+		goto out;
+
+	if ((err = do_rst_semundo(&sui, ti->cpt_sysvsem_undo, ctx)) != 0)
+		goto out;
+
+	err = -ENOMEM;
+	obj = cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, f, ctx);
+	if (obj) {
+		err = 0;
+		cpt_obj_setpos(obj, ti->cpt_sysvsem_undo, ctx);
+	}
+
+	return 0;
+
+out:
+	return err;
+}
+
+struct _sarg {
+	int semid;
+	struct cpt_sysvsem_image	*v;
+	__u32				*arr;
+};
+
+static int fixup_one_sem(int semid, struct sem_array *sma, void *arg)
+{
+	struct _sarg *warg = arg;
+
+	if (semid != warg->semid)
+		return 0;
+
+	sma->sem_perm.uid = warg->v->cpt_uid;
+	sma->sem_perm.gid = warg->v->cpt_gid;
+	sma->sem_perm.cuid = warg->v->cpt_cuid;
+	sma->sem_perm.cgid = warg->v->cpt_cgid;
+	sma->sem_perm.mode = warg->v->cpt_mode;
+	sma->sem_perm.seq = warg->v->cpt_seq;
+
+	sma->sem_ctime = warg->v->cpt_ctime;
+	sma->sem_otime = warg->v->cpt_otime;
+	memcpy(sma->sem_base, warg->arr, sma->sem_nsems*8);
+	return 1;
+}
+
+static int fixup_sem(int semid, struct cpt_sysvsem_image *v, __u32 *arr)
+{
+	struct _sarg warg;
+
+	warg.semid = semid;
+	warg.v = v;
+	warg.arr = arr;
+
+	return sysvipc_walk_sem(fixup_one_sem, &warg);
+}
+
+
+static int restore_sem(loff_t pos, struct cpt_sysvsem_image *si,
+		       struct cpt_context *ctx)
+{
+	int err;
+	__u32 *arr;
+	int nsems = (si->cpt_next - si->cpt_hdrlen)/8;
+
+	arr = kmalloc(nsems*8, GFP_KERNEL);
+	if (!arr)
+		return -ENOMEM;
+
+	err = ctx->pread(arr, nsems*8, ctx, pos+si->cpt_hdrlen);
+	if (err)
+		goto out;
+	err = sysvipc_setup_sem(si->cpt_key, si->cpt_id, nsems, si->cpt_mode);
+	if (err < 0) {
+		eprintk_ctx("SEM 3\n");
+		goto out;
+	}
+	err = fixup_sem(si->cpt_id, si, arr);
+	if (err == 0)
+		err = -ESRCH;
+	if (err > 0)
+		err = 0;
+out:
+	kfree(arr);
+	return err;
+}
+
+static int rst_sysv_sem(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_SYSV_SEM];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_sysvsem_image sbuf;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_SYSV_SEM || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int err;
+		err = rst_get_object(CPT_OBJ_SYSV_SEM, sec, &sbuf, ctx);
+		if (err)
+			return err;
+		err = restore_sem(sec, &sbuf, ctx);
+		if (err)
+			return err;
+		sec += sbuf.cpt_next;
+	}
+	return 0;
+}
+
+struct _marg {
+	int				msqid;
+	struct cpt_sysvmsg_image	*v;
+	struct msg_queue		*m;
+};
+
+static int fixup_one_msg(int msqid, struct msg_queue *msq, void *arg)
+{
+	struct _marg *warg = arg;
+
+	if (msqid != warg->msqid)
+		return 0;
+
+	msq->q_perm.uid = warg->v->cpt_uid;
+	msq->q_perm.gid = warg->v->cpt_gid;
+	msq->q_perm.cuid = warg->v->cpt_cuid;
+	msq->q_perm.cgid = warg->v->cpt_cgid;
+	msq->q_perm.mode = warg->v->cpt_mode;
+	msq->q_perm.seq = warg->v->cpt_seq;
+
+	msq->q_stime = warg->v->cpt_stime;
+	msq->q_rtime = warg->v->cpt_rtime;
+	msq->q_ctime = warg->v->cpt_ctime;
+	msq->q_lspid = warg->v->cpt_last_sender;
+	msq->q_lrpid = warg->v->cpt_last_receiver;
+	msq->q_qbytes = warg->v->cpt_qbytes;
+
+	warg->m = msq;
+	return 1;
+}
+
+struct _larg
+{
+	cpt_context_t * ctx;
+	loff_t		pos;
+};
+
+static int do_load_msg(void * dst, int len, int offset, void * data)
+{
+	struct _larg * arg = data;
+	return arg->ctx->pread(dst, len, arg->ctx, arg->pos + offset);
+}
+
+static int fixup_msg(int msqid, struct cpt_sysvmsg_image *v, loff_t pos,
+		     cpt_context_t * ctx)
+{
+	int err;
+	struct _marg warg;
+	struct ve_struct *env;
+	loff_t endpos = pos + v->cpt_next;
+
+	pos += v->cpt_hdrlen;
+	env = get_exec_env();
+
+	warg.msqid = msqid;
+	warg.v = v;
+
+	err = sysvipc_walk_msg(fixup_one_msg, &warg);
+	if (err <= 0)
+		return err;
+
+	while (pos < endpos) {
+		struct cpt_sysvmsg_msg_image mi;
+		struct msg_msg *m;
+		struct _larg data = {
+			.ctx = ctx
+		};
+
+		err = rst_get_object(CPT_OBJ_SYSVMSG_MSG, pos, &mi, ctx);
+		if (err)
+			return err;
+		data.pos = pos + mi.cpt_hdrlen;
+		m = sysv_msg_load(do_load_msg, mi.cpt_size, &data);
+		if (IS_ERR(m))
+			return PTR_ERR(m);
+		m->m_type = mi.cpt_type;
+		m->m_ts = mi.cpt_size;
+		list_add_tail(&m->m_list, &warg.m->q_messages);
+		warg.m->q_cbytes += m->m_ts;
+		warg.m->q_qnum++;
+		atomic_add(m->m_ts, &(env->_msg_bytes));
+		atomic_inc(&env->_msg_hdrs);
+
+		pos += mi.cpt_next;
+	}
+	return 1;
+}
+
+static int restore_msg(loff_t pos, struct cpt_sysvmsg_image *si,
+		       struct cpt_context *ctx)
+{
+	int err;
+
+	err = sysvipc_setup_msg(si->cpt_key, si->cpt_id, si->cpt_mode);
+	if (err < 0) {
+		eprintk_ctx("MSG 3\n");
+		goto out;
+	}
+	err = fixup_msg(si->cpt_id, si, pos, ctx);
+	if (err == 0)
+		err = -ESRCH;
+	if (err > 0)
+		err = 0;
+out:
+	return err;
+}
+
+static int rst_sysv_msg(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_SYSV_MSG];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_sysvmsg_image sbuf;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_SYSV_MSG || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int err;
+		err = rst_get_object(CPT_OBJ_SYSVMSG, sec, &sbuf, ctx);
+		if (err)
+			return err;
+		err = restore_msg(sec, &sbuf, ctx);
+		if (err)
+			return err;
+		sec += sbuf.cpt_next;
+	}
+	return 0;
+}
+
+int rst_sysv_ipc(struct cpt_context *ctx)
+{
+	int err;
+
+	err = rst_sysv_sem(ctx);
+	if (!err)
+		err = rst_sysv_msg(ctx);
+
+	return err;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_tty.c linux-2.6.9-ve023stab054/kernel/cpt/rst_tty.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_tty.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_tty.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,369 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/mount.h>
+#include <linux/tty.h>
+#include <linux/vmalloc.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+
+static int pty_setup(struct tty_struct *stty, loff_t pos,
+		     struct cpt_tty_image *pi, struct cpt_context *ctx)
+{
+	unsigned long flags;
+
+	stty->pgrp = -1;
+	stty->session = 0;
+	stty->packet = pi->cpt_packet;
+	stty->stopped = pi->cpt_stopped;
+	stty->hw_stopped = pi->cpt_hw_stopped;
+	stty->flow_stopped = pi->cpt_flow_stopped;
+#define DONOT_CHANGE ((1<<TTY_CHARGED)|(1<<TTY_CLOSING)|(1<<TTY_LDISC))
+	flags = stty->flags & DONOT_CHANGE;
+	stty->flags = flags | (pi->cpt_flags & ~DONOT_CHANGE);
+	stty->ctrl_status = pi->cpt_ctrl_status;
+	stty->winsize.ws_row = pi->cpt_ws_row;
+	stty->winsize.ws_col = pi->cpt_ws_col;
+	stty->winsize.ws_ypixel = pi->cpt_ws_prow;
+	stty->winsize.ws_xpixel = pi->cpt_ws_pcol;
+	stty->canon_column = pi->cpt_canon_column;
+	stty->column = pi->cpt_column;
+	stty->raw = pi->cpt_raw;
+	stty->real_raw = pi->cpt_real_raw;
+	stty->erasing = pi->cpt_erasing;
+	stty->lnext = pi->cpt_lnext;
+	stty->icanon = pi->cpt_icanon;
+	stty->closing = pi->cpt_closing;
+	stty->minimum_to_wake = pi->cpt_minimum_to_wake;
+
+	stty->termios->c_iflag = pi->cpt_c_iflag;
+	stty->termios->c_oflag = pi->cpt_c_oflag;
+	stty->termios->c_lflag = pi->cpt_c_lflag;
+	stty->termios->c_cflag = pi->cpt_c_cflag;
+	memcpy(&stty->termios->c_cc, &pi->cpt_c_cc, NCCS);
+	memcpy(stty->read_flags, pi->cpt_read_flags, sizeof(stty->read_flags));
+
+	if (pi->cpt_next > pi->cpt_hdrlen) {
+		int err;
+		struct cpt_obj_bits b;
+		err = rst_get_object(CPT_OBJ_BITS, pos + pi->cpt_hdrlen, &b, ctx);
+		if (err)
+			return err;
+		if (b.cpt_size == 0)
+			return 0;
+		err = ctx->pread(stty->read_buf, b.cpt_size, ctx, pos + pi->cpt_hdrlen + b.cpt_hdrlen);
+		if (err)
+			return err;
+
+		spin_lock_irq(&stty->read_lock);
+		stty->read_tail = 0;
+		stty->read_cnt = b.cpt_size;
+		stty->read_head = b.cpt_size;
+		stty->canon_head = stty->read_tail + pi->cpt_canon_head;
+		stty->canon_data = pi->cpt_canon_data;
+		spin_unlock_irq(&stty->read_lock);
+	}
+
+	return 0;
+}
+
+/* Find slave/master tty in image, when we already know master/slave.
+ * It might be optimized, of course. */
+static loff_t find_pty_pair(struct tty_struct *stty, loff_t pos, struct cpt_tty_image *pi, struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_TTY];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_tty_image *pibuf;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return CPT_NULL;
+	if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
+		return CPT_NULL;
+	pibuf = kmalloc(sizeof(*pibuf), GFP_KERNEL);
+	if (pibuf == NULL) {
+		eprintk_ctx("cannot allocate buffer\n");
+		return CPT_NULL;
+	}
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx))
+			return CPT_NULL;
+		if (pibuf->cpt_index == pi->cpt_index &&
+		    !((pi->cpt_drv_flags^pibuf->cpt_drv_flags)&TTY_DRIVER_DEVPTS_MEM) &&
+		    pos != sec) {
+			pty_setup(stty, sec, pibuf, ctx);
+			return sec;
+		}
+		sec += pibuf->cpt_next;
+	}
+	kfree(pibuf);
+	return CPT_NULL;
+}
+
+static int fixup_tty_attrs(struct cpt_inode_image *ii, struct file *master,
+			   struct cpt_context *ctx)
+{
+	int err;
+	struct iattr newattrs;
+	struct dentry *d = master->f_dentry;
+
+	newattrs.ia_valid = ATTR_UID|ATTR_GID|ATTR_MODE;
+	newattrs.ia_uid = ii->cpt_uid;
+	newattrs.ia_gid = ii->cpt_gid;
+	newattrs.ia_mode = ii->cpt_mode;
+
+	down(&d->d_inode->i_sem);
+	err = notify_change(d, &newattrs);
+	up(&d->d_inode->i_sem);
+
+	return err;
+}
+
+/* NOTE: "portable", but ugly thing. To allocate /dev/pts/N, we open
+ * /dev/ptmx until we get pty with desired index.
+ */
+
+struct file *ptmx_open(int index, unsigned int flags)
+{
+	struct file *file;
+	struct file **stack = NULL;
+	int depth = 0;
+
+	for (;;) {
+		struct tty_struct *tty;
+
+		file = filp_open("/dev/ptmx", flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+		if (IS_ERR(file))
+			break;
+		tty = file->private_data;
+		if (tty->index == index)
+			break;
+
+		if (depth == PAGE_SIZE/sizeof(struct file *)) {
+			fput(file);
+			file = ERR_PTR(-EBUSY);
+			break;
+		}
+		if (stack == NULL) {
+			stack = (struct file **)__get_free_page(GFP_KERNEL);
+			if (!stack) {
+				fput(file);
+				file = ERR_PTR(-ENOMEM);
+				break;
+			}
+		}
+		stack[depth] = file;
+		depth++;
+	}
+	while (depth > 0) {
+		depth--;
+		fput(stack[depth]);
+	}
+	if (stack)
+		free_page((unsigned long)stack);
+	return file;
+}
+
+
+struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii,
+			   unsigned flags, struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct file *master, *slave;
+	struct tty_struct *stty;
+	struct cpt_tty_image *pi;
+	static char *a = "pqrstuvwxyzabcde";
+	static char *b = "0123456789abcdef";
+	char pairname[16];
+	unsigned master_flags, slave_flags;
+
+	if (fi->cpt_priv == CPT_NULL)
+		return ERR_PTR(-EINVAL);
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, fi->cpt_priv, ctx);
+	if (obj && obj->o_parent) {
+		dprintk_ctx("obtained pty as pair to existing\n");
+		master = obj->o_parent;
+		stty = master->private_data;
+
+		if (stty->driver->subtype == PTY_TYPE_MASTER &&
+		    (stty->driver->flags&TTY_DRIVER_DEVPTS_MEM)) {
+			wprintk_ctx("cloning ptmx\n");
+			get_file(master);
+			return master;
+		}
+
+		master = dentry_open(dget(master->f_dentry),
+				     mntget(master->f_vfsmnt), flags);
+		if (!IS_ERR(master)) {
+			stty = master->private_data;
+			if (stty->driver->subtype != PTY_TYPE_MASTER)
+				fixup_tty_attrs(ii, master, ctx);
+		}
+		return master;
+	}
+
+	pi = cpt_get_buf(ctx);
+	err = rst_get_object(CPT_OBJ_TTY, fi->cpt_priv, pi, ctx);
+	if (err) {
+		cpt_release_buf(ctx);
+		return ERR_PTR(err);
+	}
+
+	master_flags = slave_flags = 0;
+	if (pi->cpt_drv_subtype == PTY_TYPE_MASTER)
+		master_flags = flags;
+	else
+		slave_flags = flags;
+
+	/*
+	 * Open pair master/slave.
+	 */
+	if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM) {
+		master = ptmx_open(pi->cpt_index, master_flags);
+	} else {
+		sprintf(pairname, "/dev/pty%c%c", a[pi->cpt_index/16], b[pi->cpt_index%16]);
+		master = filp_open(pairname, master_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+	}
+	if (IS_ERR(master)) {
+		eprintk_ctx("filp_open master: %Ld %ld\n", fi->cpt_priv, PTR_ERR(master));
+		cpt_release_buf(ctx);
+		return master;
+	}
+	stty = master->private_data;
+	clear_bit(TTY_PTY_LOCK, &stty->flags);
+	if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM)
+		sprintf(pairname, "/dev/pts/%d", stty->index);
+	else
+		sprintf(pairname, "/dev/tty%c%c", a[stty->index/16], b[stty->index%16]);
+	slave = filp_open(pairname, slave_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+	if (IS_ERR(slave)) {
+		eprintk_ctx("filp_open slave %s: %ld\n", pairname, PTR_ERR(slave));
+		fput(master);
+		cpt_release_buf(ctx);
+		return slave;
+	}
+
+	if (pi->cpt_drv_subtype != PTY_TYPE_MASTER)
+		fixup_tty_attrs(ii, slave, ctx);
+
+	cpt_object_add(CPT_OBJ_TTY, master->private_data, ctx);
+	cpt_object_add(CPT_OBJ_TTY, slave->private_data, ctx);
+	cpt_object_add(CPT_OBJ_FILE, master, ctx);
+	cpt_object_add(CPT_OBJ_FILE, slave, ctx);
+
+	if (pi->cpt_drv_subtype == PTY_TYPE_MASTER) {
+		loff_t pos;
+		obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
+		obj->o_parent = master;
+		cpt_obj_setpos(obj, fi->cpt_priv, ctx);
+		pty_setup(stty, fi->cpt_priv, pi, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
+		obj->o_parent = slave;
+		pos = find_pty_pair(stty->link, fi->cpt_priv, pi, ctx);
+		cpt_obj_setpos(obj, pos, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_FILE, slave, ctx);
+		cpt_obj_setpos(obj, CPT_NULL, ctx);
+		get_file(master);
+		cpt_release_buf(ctx);
+		return master;
+	} else {
+		loff_t pos;
+		obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
+		obj->o_parent = slave;
+		cpt_obj_setpos(obj, fi->cpt_priv, ctx);
+		pty_setup(stty->link, fi->cpt_priv, pi, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
+		obj->o_parent = master;
+		pos = find_pty_pair(stty, fi->cpt_priv, pi, ctx);
+		cpt_obj_setpos(obj, pos, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_FILE, master, ctx);
+		cpt_obj_setpos(obj, CPT_NULL, ctx);
+		get_file(slave);
+		cpt_release_buf(ctx);
+		return slave;
+	}
+}
+
+int rst_tty_jobcontrol(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_TTY];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		cpt_object_t *obj;
+		struct cpt_tty_image *pibuf = cpt_get_buf(ctx);
+
+		if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx)) {
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, sec, ctx);
+		if (obj) {
+			struct tty_struct *stty = obj->o_obj;
+			if ((int)pibuf->cpt_pgrp > 0) {
+				stty->pgrp = vpid_to_pid(pibuf->cpt_pgrp);
+				if (stty->pgrp == -1)
+					dprintk_ctx("unknown tty pgrp %d\n", pibuf->cpt_pgrp);
+			} else if (pibuf->cpt_pgrp) {
+				stty->pgrp = alloc_pidmap();
+				if (stty->pgrp < 0) {
+					eprintk_ctx("cannot allocate stray tty->pgrp");
+					cpt_release_buf(ctx);
+					return -EINVAL;
+				}
+				free_pidmap(stty->pgrp);
+			}
+			if ((int)pibuf->cpt_session > 0) {
+				int sess;
+				sess = vpid_to_pid(pibuf->cpt_session);
+				if (sess == -1) {
+					dprintk_ctx("unknown tty session %d\n", pibuf->cpt_session);
+				} else if (stty->session <= 0) {
+					stty->session = sess;
+				} else if (stty->session != sess) {
+					wprintk_ctx("tty session mismatch 2\n");
+				}
+			}
+		}
+		sec += pibuf->cpt_next;
+		cpt_release_buf(ctx);
+	}
+	return 0;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_ubc.c linux-2.6.9-ve023stab054/kernel/cpt/rst_ubc.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_ubc.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_ubc.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,119 @@
+#include <linux/types.h>
+#include <ub/beancounter.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, pos, ctx);
+	if (obj == NULL) {
+		printk(KERN_ERR "RST: unknown ub @%Lu\n", pos);
+		return get_beancounter(get_exec_ub());
+	}
+	return get_beancounter(obj->o_obj);
+}
+
+void copy_one_ubparm(struct ubparm *from, struct ubparm *to, int bc_parm_id)
+{
+	to[bc_parm_id].barrier = from[bc_parm_id].barrier;
+	to[bc_parm_id].limit = from[bc_parm_id].limit;
+}
+
+void set_one_ubparm_to_max(struct ubparm *ubprm, int bc_parm_id)
+{
+	ubprm[bc_parm_id].barrier = UB_MAXVALUE;
+	ubprm[bc_parm_id].limit = UB_MAXVALUE;
+}
+
+static void restore_one_bc_parm(struct cpt_ubparm *dmp, struct ubparm *prm,
+		int held)
+{
+	prm->barrier = (dmp->barrier == CPT_NULL ? UB_MAXVALUE : dmp->barrier);
+	prm->limit = (dmp->limit == CPT_NULL ? UB_MAXVALUE : dmp->limit);
+	if (held)
+		prm->held = dmp->held;
+	prm->maxheld = dmp->maxheld;
+	prm->minheld = dmp->minheld;
+	prm->failcnt = dmp->failcnt;
+}
+
+static int restore_one_bc(struct cpt_beancounter_image *v,
+		cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct user_beancounter *bc;
+	cpt_object_t *pobj;
+	int i;
+
+	if (v->cpt_parent != CPT_NULL) {
+		pobj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, v->cpt_parent, ctx);
+		if (pobj == NULL)
+			return -ESRCH;
+		bc = get_subbeancounter_byid(pobj->o_obj, v->cpt_id, 1);
+	} else {
+		bc = get_exec_ub();
+		while (bc->parent)
+			bc = bc->parent;
+		get_beancounter(bc);
+	}
+	if (bc == NULL)
+		return -ENOMEM;
+	obj->o_obj = bc;
+
+	if (ctx->image_version < CPT_VERSION_9_1)
+		goto out;
+
+	for (i = 0; i < UB_RESOURCES; i++) {
+		restore_one_bc_parm(v->cpt_parms + i * 2, bc->ub_parms + i, 0);
+		restore_one_bc_parm(v->cpt_parms + i * 2 + 1,
+				bc->ub_store + i, 1);
+	}
+
+out:
+	if (!bc->parent)
+		for (i = 0; i < UB_RESOURCES; i++)
+			copy_one_ubparm(bc->ub_parms, ctx->saved_ubc, i);
+
+	return 0;
+}
+
+int rst_undump_ubc(struct cpt_context *ctx)
+{
+	loff_t start, end;
+	struct cpt_beancounter_image *v;
+	cpt_object_t *obj;
+	int err;
+
+	err = rst_get_section(CPT_SECT_UBC, ctx, &start, &end);
+	if (err)
+		return err;
+
+	while (start < end) {
+		v = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_UBC, start, v, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		cpt_obj_setpos(obj, start, ctx);
+		intern_cpt_object(CPT_OBJ_UBC, obj, ctx);
+
+		restore_one_bc(v, obj, ctx);
+
+		cpt_release_buf(ctx);
+		start += v->cpt_next;
+	}
+	return 0;
+}
+
+void rst_finish_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_UBC)
+		put_beancounter(obj->o_obj);
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_undump.c linux-2.6.9-ve023stab054/kernel/cpt/rst_undump.c
--- linux-2.6.9-100.orig/kernel/cpt/rst_undump.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_undump.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,894 @@
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/namespace.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/smp_lock.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <linux/vzcalluser.h>
+#include <ub/beancounter.h>
+#include <asm/desc.h>
+#include <asm/unistd.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_process.h"
+#include "cpt_socket.h"
+#include "cpt_net.h"
+#include "cpt_ubc.h"
+#include "cpt_kernel.h"
+
+static int rst_utsname(cpt_context_t *ctx);
+
+extern FASTCALL(void recalc_sigpending_tsk(struct task_struct *t));
+
+struct thr_context {
+	struct completion init_complete;
+	struct completion task_done;
+	int error;
+	struct cpt_context *ctx;
+	cpt_object_t	*tobj;
+};
+
+static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx);
+
+void __put_namespace(struct namespace *namespace)
+{
+	eprintk("orphan namespace is lost\n");
+}
+
+static int vps_rst_veinfo(struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_veinfo_image *i;
+	struct ve_struct *ve;
+	struct timespec delta;
+	loff_t start, end;
+
+	err = rst_get_section(CPT_SECT_VEINFO, ctx, &start, &end);
+	if (err)
+		goto out;
+
+	i = cpt_get_buf(ctx);
+	err = rst_get_object(CPT_OBJ_VEINFO, start, i, ctx);
+	if (err)
+		goto out_rel;
+
+	ve = get_exec_env();
+	ve->_shm_ctlall = i->shm_ctl_all;
+	ve->_shm_ctlmax = i->shm_ctl_max;
+	ve->_shm_ctlmni = i->shm_ctl_mni;
+
+	ve->_msg_ctlmax = i->msg_ctl_max;
+	ve->_msg_ctlmni = i->msg_ctl_mni;
+	ve->_msg_ctlmnb = i->msg_ctl_mnb;
+
+	BUG_ON(sizeof(ve->_sem_ctls) != sizeof(i->sem_ctl_arr));
+	ve->_sem_ctls[0] = i->sem_ctl_arr[0];
+	ve->_sem_ctls[1] = i->sem_ctl_arr[1];
+	ve->_sem_ctls[2] = i->sem_ctl_arr[2];
+	ve->_sem_ctls[3] = i->sem_ctl_arr[3];
+
+	cpt_timespec_import(&delta, i->start_timespec_delta);
+	set_normalized_timespec(&ve->start_timespec,
+			ve->start_timespec.tv_sec - delta.tv_sec,
+			ve->start_timespec.tv_nsec - delta.tv_nsec);
+	ve->start_jiffies -= i->start_jiffies_delta;
+	ve->start_cycles -= (s64)i->start_jiffies_delta * cycles_per_jiffy;
+
+	err = 0;
+out_rel:
+	cpt_release_buf(ctx);
+out:
+	return err;
+}
+
+static int vps_rst_reparent_root(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err;
+	struct env_create_param2 param;
+
+	ctx->cpt_jiffies64 = get_jiffies_64();
+	do_gettimespec(&ctx->delta_time);
+
+	set_normalized_timespec(&ctx->delta_time,
+				ctx->delta_time.tv_sec - ctx->start_time.tv_sec,
+				ctx->delta_time.tv_nsec - ctx->start_time.tv_nsec);
+	ctx->delta_nsec = (s64)ctx->delta_time.tv_sec*NSEC_PER_SEC + ctx->delta_time.tv_nsec;
+	if (ctx->delta_nsec < 0) {
+		wprintk_ctx("Wall time is behind source by %Ld ns, "
+			    "time sensitive applications can misbehave\n", -ctx->delta_nsec);
+	}
+
+	memset(&param, 0, sizeof(param));
+	param.iptables_mask = ctx->iptables_mask;
+	param.feature_mask = ctx->features;
+
+	err = real_env_create(ctx->ve_id, VE_CREATE|VE_LOCK, 2, &param, sizeof(param));
+	if (err < 0)
+		eprintk_ctx("real_env_create: %d\n", err);
+	get_exec_env()->jiffies_fixup =
+		(ctx->delta_time.tv_sec < 0 ?
+		 0 : timespec_to_jiffies(&ctx->delta_time)) -
+		(unsigned long)(ctx->cpt_jiffies64 - ctx->virt_jiffies64);
+	return err < 0 ? err : 0;
+}
+
+
+static int hook(void *arg)
+{
+	struct thr_context *thr_ctx = arg;
+	struct cpt_context *ctx;
+	cpt_object_t *tobj;
+	struct cpt_task_image *ti;
+	int err = 0;
+	int exiting = 0;
+
+	current->state = TASK_UNINTERRUPTIBLE;
+	complete(&thr_ctx->init_complete);
+	schedule();
+
+	ctx = thr_ctx->ctx;
+	tobj = thr_ctx->tobj;
+	ti = tobj->o_image;
+
+	current->fs->umask = 0;
+
+	if (ti->cpt_pid == 1) {
+#ifdef CONFIG_USER_RESOURCE
+		struct user_beancounter *bc;
+#endif
+
+		err = vps_rst_reparent_root(tobj, ctx);
+
+		if (err) {
+			rst_report_error(err, ctx);
+			goto out;
+		}
+
+		memcpy(&get_exec_env()->cap_default, &ti->cpt_ecap, sizeof(kernel_cap_t));
+
+		if (ctx->statusfile) {
+			fput(ctx->statusfile);
+			ctx->statusfile = NULL;
+		}
+
+		if (ctx->lockfile) {
+			mm_segment_t oldfs;
+			err = -EINVAL;
+			char b;
+
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			if (ctx->lockfile->f_op && ctx->lockfile->f_op->read)
+				err = ctx->lockfile->f_op->read(ctx->lockfile, &b, 1, &ctx->lockfile->f_pos);
+			set_fs(oldfs);
+			fput(ctx->lockfile);
+			ctx->lockfile = NULL;
+		}
+
+		if (err) {
+			eprintk_ctx("CPT: lock fd is closed incorrectly: %d\n", err);
+			goto out;
+		}
+		err = vps_rst_veinfo(ctx);
+		if (err) {
+			eprintk_ctx("rst_veinfo: %d\n", err);
+			goto out;
+		}
+
+		err = rst_utsname(ctx);
+		if (err) {
+			eprintk_ctx("rst_utsname: %d\n", err);
+			goto out;
+		}
+
+		err = rst_root_namespace(ctx);
+		if (err) {
+			eprintk_ctx("rst_namespace: %d\n", err);
+			goto out;
+		}
+
+		if ((err = rst_restore_net(ctx)) != 0) {
+			eprintk_ctx("rst_restore_net: %d\n", err);
+			goto out;
+		}
+
+		err = rst_sockets(ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: %d\n", err);
+			goto out;
+		}
+		err = rst_sysv_ipc(ctx);
+		if (err) {
+			eprintk_ctx("rst_sysv_ipc: %d\n", err);
+			goto out;
+		}
+#ifdef CONFIG_USER_RESOURCE
+		bc = get_exec_ub();
+		set_one_ubparm_to_max(bc->ub_parms, UB_KMEMSIZE);
+		set_one_ubparm_to_max(bc->ub_parms, UB_NUMPROC);
+		set_one_ubparm_to_max(bc->ub_parms, UB_NUMFILE);
+		set_one_ubparm_to_max(bc->ub_parms, UB_DCACHESIZE);
+#endif
+	}
+
+	do {
+		if (current->user->uid != ti->cpt_user) {
+			struct user_struct *u = alloc_uid(ti->cpt_user);
+			if (!u) {
+				eprintk_ctx("alloc_user\n");
+			} else {
+				switch_uid(u);
+			}
+		}
+	} while (0);
+
+	if ((err = rst_mm_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_mm: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_files_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_files: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_fs_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_fs: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_semundo_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_semundo: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_signal_complete(ti, &exiting, ctx)) != 0) {
+		eprintk_ctx("rst_signal: %d\n", err);
+		goto out;
+	}
+
+	if (ti->cpt_namespace == CPT_NULL)
+		exit_namespace(current);
+
+	if (ti->cpt_personality != 0)
+		__set_personality(ti->cpt_personality);
+
+	current->set_child_tid = NULL;
+	current->clear_child_tid = NULL;
+	current->flags &= ~(PF_FORKNOEXEC|PF_SUPERPRIV);
+	current->flags |= ti->cpt_flags&(PF_FORKNOEXEC|PF_SUPERPRIV);
+	current->exit_code = ti->cpt_exit_code;
+	current->pdeath_signal = ti->cpt_pdeath_signal;
+
+	if (ti->cpt_restart.fn != CPT_RBL_0) {
+		if (ti->cpt_restart.fn != CPT_RBL_NANOSLEEP
+		    && ti->cpt_restart.fn != CPT_RBL_COMPAT_NANOSLEEP
+		    ) {
+			eprintk_ctx("unknown restart block\n");
+		} else {
+			s64 val;
+
+			current->thread_info->restart_block.fn = nanosleep_restart;
+#ifdef CONFIG_X86_64
+			if (!ti->cpt_64bit)
+				current->thread_info->restart_block.fn = compat_nanosleep_restart;
+#endif
+			val = ti->cpt_restart.arg0;
+			if (ctx->image_version < CPT_VERSION_9)
+				val *= TICK_NSEC;
+			val -= ctx->delta_nsec;
+			if (val <= 0)
+				val = TICK_NSEC;
+			val = _ns_to_jiffies(val + TICK_NSEC - 1);
+			current->thread_info->restart_block.arg0 = val + 
+					(unsigned long)ctx->cpt_jiffies64;
+			if (ctx->image_version < CPT_VERSION_9)
+				current->thread_info->restart_block.arg1 = ti->cpt_restart.arg1;
+			else
+				current->thread_info->restart_block.arg1 = ti->cpt_restart.arg2;
+			current->thread_info->restart_block.arg2 = 0;
+			current->thread_info->restart_block.arg3 = 0;
+			dprintk_ctx("rbl " CPT_FID " +%Ld %lu %lu %lu %lu %lu\n", CPT_TID(current),
+				val, (unsigned long)current->thread_info->restart_block.arg0, jiffies,
+				(unsigned long)ti->cpt_restart.arg0, timespec_to_jiffies(&ctx->delta_time), (unsigned long)ctx->cpt_jiffies64);
+		}
+	}
+
+	if (ctx->image_version < CPT_VERSION_9)
+		current->it_real_incr = ti->cpt_it_real_incr;
+	else
+		current->it_real_incr = _ns_to_jiffies(ti->cpt_it_real_incr);
+	current->it_prof_incr = ti->cpt_it_prof_incr;
+	current->it_virt_incr = ti->cpt_it_virt_incr; 
+	current->it_prof_value = ti->cpt_it_prof_value;
+	current->it_virt_value = ti->cpt_it_virt_value;
+
+	err = rst_clone_children(tobj, ctx);
+	if (err) {
+		eprintk_ctx("rst_clone_children\n");
+		goto out;
+	}
+
+	if (exiting)
+		current->signal->group_exit = 1;
+
+	if (ti->cpt_pid == 1) {
+		if ((err = rst_process_linkage(ctx)) != 0) {
+			eprintk_ctx("rst_process_linkage: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_do_filejobs(ctx)) != 0) {
+			eprintk_ctx("rst_do_filejobs: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_eventpoll(ctx)) != 0) {
+			eprintk_ctx("rst_eventpoll: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_sockets_complete(ctx)) != 0) {
+			eprintk_ctx("rst_sockets_complete: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_stray_files(ctx)) != 0) {
+			eprintk_ctx("rst_stray_files: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_posix_locks(ctx)) != 0) {
+			eprintk_ctx("rst_posix_locks: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_tty_jobcontrol(ctx)) != 0) {
+			eprintk_ctx("rst_tty_jobcontrol: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_restore_fs(ctx)) != 0) {
+			eprintk_ctx("rst_restore_fs: %d\n", err);
+			goto out;
+		}
+		if (virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RESTORE, ctx) & NOTIFY_FAIL) {
+			err = -ECHRNG;
+			eprintk_ctx("scp_restore failed\n");
+			goto out;
+		}
+	}
+
+out:
+	thr_ctx->error = err;
+	lock_kernel();
+	complete(&thr_ctx->task_done);
+
+	if (!err && (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+		preempt_disable();
+		current->exit_state = EXIT_ZOMBIE;
+		write_lock_irq(&tasklist_lock);
+		nr_zombie++;
+		write_unlock_irq(&tasklist_lock);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+		atomic_dec(&current->signal->live);
+#endif
+		current->flags |= PF_DEAD;
+		if (!(ti->cpt_flags&PF_DEAD))
+			wprintk_ctx("zombie %d,%d(%s) is not pf_dead\n", current->pid, virt_pid(current), current->comm);
+		module_put(current->thread_info->exec_domain->module);
+		if (current->binfmt)
+			module_put(current->binfmt->module);
+	} else {
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+	}
+
+	schedule();
+
+	dprintk_ctx("leaked through %d/%d %p\n", current->pid, virt_pid(current), current->mm);
+
+	module_put(THIS_MODULE);
+	complete_and_exit(NULL, 0);
+	return 0;
+}
+
+#if 0
+static void set_task_ubs(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct task_beancounter *tbc;
+
+	tbc = task_bc(current);
+
+	put_beancounter(tbc->fork_sub);
+	tbc->fork_sub = rst_lookup_ubc(ti->cpt_task_ub, ctx);
+	if (ti->cpt_mm_ub != CPT_NULL) {
+		put_beancounter(tbc->exec_ub);
+		tbc->exec_ub = rst_lookup_ubc(ti->cpt_mm_ub, ctx);
+	}
+}
+#endif
+
+static int create_root_task(cpt_object_t *obj, struct cpt_context *ctx,
+		struct thr_context *thr_ctx)
+{
+	task_t *tsk;
+	int pid;
+
+	thr_ctx->ctx = ctx;
+	thr_ctx->error = 0;
+	init_completion(&thr_ctx->init_complete);
+	init_completion(&thr_ctx->task_done);
+#if 0
+	set_task_ubs(obj->o_image, ctx);
+#endif
+
+	pid = local_kernel_thread(hook, thr_ctx, 0, 0);
+	if (pid < 0)
+		return pid;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_ve(pid);
+	if (tsk)
+		get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+	if (tsk == NULL)
+		return -ESRCH;
+	cpt_obj_setobj(obj, tsk, ctx);
+	thr_ctx->tobj = obj;
+	return 0;
+}
+
+static int rst_basic_init_task(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	task_t *tsk = obj->o_obj;
+	struct cpt_task_image *ti = obj->o_image;
+
+	memcpy(tsk->comm, ti->cpt_comm, sizeof(tsk->comm));
+	rst_mm_basic(obj, ti, ctx);
+	return 0;
+}
+
+static int make_baby(cpt_object_t *cobj,
+		     struct cpt_task_image *pi,
+		     struct cpt_context *ctx)
+{
+	unsigned long flags;
+	struct cpt_task_image *ci = cobj->o_image;
+	struct thr_context thr_ctx;
+	task_t *tsk;
+	pid_t pid;
+	struct fs_struct *tfs = NULL;
+
+	flags = rst_mm_flag(ci, ctx) | rst_files_flag(ci, ctx)
+		| rst_signal_flag(ci, ctx) | rst_semundo_flag(ci, ctx);
+	if (ci->cpt_rppid != pi->cpt_pid) {
+		flags |= CLONE_THREAD|CLONE_PARENT;
+		if (ci->cpt_signal != pi->cpt_signal ||
+		    !(flags&CLONE_SIGHAND) ||
+		    (!(flags&CLONE_VM) && pi->cpt_mm != CPT_NULL)) {
+			eprintk_ctx("something is wrong with threads: %d %d %d %Ld %Ld %08lx\n",
+			       (int)ci->cpt_pid, (int)ci->cpt_rppid, (int)pi->cpt_pid,
+			       ci->cpt_signal, pi->cpt_signal, flags
+			       );
+			return -EINVAL;
+		}
+	}
+
+	thr_ctx.ctx = ctx;
+	thr_ctx.error = 0;
+	init_completion(&thr_ctx.init_complete);
+	init_completion(&thr_ctx.task_done);
+	thr_ctx.tobj = cobj;
+
+#if 0
+	set_task_ubs(ci, ctx);
+#endif
+
+	if (current->fs == NULL) {
+		tfs = get_exec_env()->init_entry->fs;
+		if (tfs == NULL)
+			return -EINVAL;
+		atomic_inc(&tfs->count);
+		current->fs = tfs;
+	}
+	pid = local_kernel_thread(hook, &thr_ctx, flags, ci->cpt_pid);
+	if (tfs) {
+		current->fs = NULL;
+		atomic_dec(&tfs->count);
+	}
+	if (pid < 0)
+		return pid;
+
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_ve(pid);
+	if (tsk)
+		get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+	if (tsk == NULL)
+		return -ESRCH;
+	cpt_obj_setobj(cobj, tsk, ctx);
+	thr_ctx.tobj = cobj;
+	wait_for_completion(&thr_ctx.init_complete);
+#ifdef CONFIG_SMP
+	wait_task_inactive(cobj->o_obj);
+#endif
+	rst_basic_init_task(cobj, ctx);
+
+	/* clone() increases group_stop_count if it was not zero and
+	 * CLONE_THREAD was asked. Undo.
+	 */
+	if (current->signal->group_stop_count && (flags & CLONE_THREAD)) {
+		if (tsk->signal != current->signal) BUG();
+		current->signal->group_stop_count--;
+	}
+
+	wake_up_process(tsk);
+	wait_for_completion(&thr_ctx.task_done);
+	wait_task_inactive(tsk);
+
+	return thr_ctx.error;
+}
+
+static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct cpt_task_image *ti = obj->o_image;
+	cpt_object_t *cobj;
+
+	for_each_object(cobj, CPT_OBJ_TASK) {
+		struct cpt_task_image *ci = cobj->o_image;
+		if (cobj == obj)
+			continue;
+		if ((ci->cpt_rppid == ti->cpt_pid && ci->cpt_tgid == ci->cpt_pid) ||
+		    (ci->cpt_leader == ti->cpt_pid &&
+		     ci->cpt_tgid != ci->cpt_pid && ci->cpt_pid != 1)) {
+			err = make_baby(cobj, ti, ctx);
+			if (err) {
+				eprintk_ctx("make_baby: %d\n", err);
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+static int read_task_images(struct cpt_context *ctx)
+{
+	int err;
+	loff_t start, end;
+
+	err = rst_get_section(CPT_SECT_TASKS, ctx, &start, &end);
+	if (err)
+		return err;
+
+	while (start < end) {
+		cpt_object_t *obj;
+		struct cpt_task_image *ti = cpt_get_buf(ctx);
+
+		err = rst_get_object(CPT_OBJ_TASK, start, ti, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (ti->cpt_pid != 1 && !__is_virtual_pid(ti->cpt_pid)) {
+			eprintk_ctx("BUG: pid %d is not virtual\n", ti->cpt_pid);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		cpt_obj_setpos(obj, start, ctx);
+		intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
+		obj->o_image = kmalloc(ti->cpt_next, GFP_KERNEL);
+		if (obj->o_image == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		memcpy(obj->o_image, ti, sizeof(*ti));
+		err = ctx->pread(obj->o_image + sizeof(*ti),
+				 ti->cpt_next - sizeof(*ti), ctx, start + sizeof(*ti));
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		start += ti->cpt_next;
+	}
+	return 0;
+}
+
+
+static int vps_rst_restore_tree(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct thr_context thr_ctx_root;
+
+	err = read_task_images(ctx);
+	if (err)
+		return err;
+
+	err = rst_undump_ubc(ctx);
+	if (err)
+		return err;
+
+	if (virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RSTCHECK, ctx) & NOTIFY_FAIL)
+		return -ECHRNG;
+
+	err = rst_setup_pagein(ctx);
+	if (err)
+		return err;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		err = create_root_task(obj, ctx, &thr_ctx_root);
+		if (err)
+			return err;
+
+		wait_for_completion(&thr_ctx_root.init_complete);
+#ifdef CONFIG_SMP
+		wait_task_inactive(obj->o_obj);
+#endif
+		rst_basic_init_task(obj, ctx);
+
+		wake_up_process(obj->o_obj);
+		wait_for_completion(&thr_ctx_root.task_done);
+		wait_task_inactive(obj->o_obj);
+		err = thr_ctx_root.error;
+		if (err)
+			return err;
+		break;
+	}
+
+	return err;
+}
+
+int rst_read_vdso(struct cpt_context *ctx)
+{
+	int err;
+	loff_t start, end;
+	struct cpt_page_block *pgb;
+
+	ctx->vdso = NULL;
+	err = rst_get_section(CPT_SECT_VSYSCALL, ctx, &start, &end);
+	if (err)
+		return err;
+	if (start == CPT_NULL)
+		return 0;
+	if (end < start + sizeof(*pgb) + PAGE_SIZE)
+		return -EINVAL;
+
+	pgb = cpt_get_buf(ctx);
+	err = rst_get_object(CPT_OBJ_VSYSCALL, start, pgb, ctx);
+	if (err) {
+		goto err_buf;
+	}
+	ctx->vdso = (char*)__get_free_page(GFP_KERNEL);
+	if (ctx->vdso == NULL) {
+		err = -ENOMEM;
+		goto err_buf;
+	}
+	err = ctx->pread(ctx->vdso, PAGE_SIZE, ctx, start + sizeof(*pgb));
+	if (err)
+		goto err_page;
+	if (!memcmp(ctx->vdso, vsyscall_addr, PAGE_SIZE)) {
+		free_page((unsigned long)ctx->vdso);
+		ctx->vdso = NULL;
+	}
+
+	cpt_release_buf(ctx);
+	return 0;
+err_page:
+	free_page((unsigned long)ctx->vdso);
+	ctx->vdso = NULL;
+err_buf:
+	cpt_release_buf(ctx);
+	return err;
+}
+
+int vps_rst_undump(struct cpt_context *ctx)
+{
+	int err;
+	unsigned long umask;
+
+	err = rst_open_dumpfile(ctx);
+	if (err)
+		return err;
+
+#ifndef CONFIG_X86_64
+	if (ctx->tasks64) {
+		eprintk_ctx("Cannot restore 64 bit VE on this architecture\n");
+		return -EINVAL;
+	}
+#endif
+
+	umask = current->fs->umask;
+	current->fs->umask = 0;
+
+	err = rst_read_vdso(ctx);
+
+	if (err == 0)
+		err = vps_rst_restore_tree(ctx);
+
+	if (err == 0)
+		err = rst_restore_process(ctx);
+
+	if (err)
+		virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RSTFAIL, ctx);
+
+	current->fs->umask = umask;
+
+	return err;
+}
+
+static int rst_unlock_ve(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	down_write(&env->op_sem);
+	env->is_locked = 0;
+	up_write(&env->op_sem);
+	put_ve(env);
+	return 0;
+}
+
+int rst_resume(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	struct user_beancounter *bc;
+	int err = 0;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		fput(file);
+	}
+
+	bc = get_beancounter_byuid(ctx->ve_id, 0);
+	BUG_ON(!bc);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_KMEMSIZE);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_NUMPROC);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_NUMFILE);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_DCACHESIZE);
+	put_beancounter(bc);
+
+	rst_resume_network(ctx);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+
+		if (!tsk)
+			continue;
+
+		if (ti->cpt_state == TASK_UNINTERRUPTIBLE) {
+			dprintk_ctx("task %d/%d(%s) is started\n", virt_pid(tsk), tsk->pid, tsk->comm);
+
+			/* Weird... If a signal is sent to stopped task,
+			 * nobody makes recalc_sigpending(). We have to do
+			 * this by hands after wake_up_process().
+			 * if we did this before a signal could arrive before
+			 * wake_up_process() and stall.
+			 */
+			spin_lock_irq(&tsk->sighand->siglock);
+			if (!signal_pending(tsk))
+				recalc_sigpending_tsk(tsk);
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			wake_up_process(tsk);
+		} else {
+			if (ti->cpt_state == TASK_STOPPED ||
+			    ti->cpt_state == TASK_TRACED) {
+				set_task_state(tsk, ti->cpt_state);
+			}
+		}
+		put_task_struct(tsk);
+	}
+
+	rst_unlock_ve(ctx);
+
+	rst_complete_pagein(ctx, 0);
+
+	rst_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+
+	return err;
+}
+
+int rst_kill(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	int err = 0;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		fput(file);
+	}
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+
+		if (tsk == NULL)
+			continue;
+
+		if (tsk->exit_state == 0) {
+			send_sig(SIGKILL, tsk, 1);
+
+			spin_lock_irq(&tsk->sighand->siglock);
+			sigfillset(&tsk->blocked);
+			sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
+			set_tsk_thread_flag(tsk, TIF_SIGPENDING);
+			clear_tsk_thread_flag(tsk, TIF_FREEZE);
+			if (tsk->flags & PF_FROZEN)
+				tsk->flags &= ~PF_FROZEN;
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			wake_up_process(tsk);
+		}
+
+		put_task_struct(tsk);
+	}
+
+	rst_complete_pagein(ctx, 1);
+
+	rst_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+
+	return err;
+}
+
+static int rst_utsname(cpt_context_t *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_UTSNAME];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr o;
+	int i;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_UTSNAME || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	i = 0;
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int len;
+		char *ptr;
+		err = rst_get_object(CPT_OBJ_NAME, sec, &o, ctx);
+		if (err)
+			return err;
+		len = o.cpt_next - o.cpt_hdrlen;
+		if (len > __NEW_UTS_LEN+1)
+			return -ENAMETOOLONG;
+		switch (i) {
+		case 0:
+			ptr = ve_utsname.nodename; break;
+		case 1:
+			ptr = ve_utsname.domainname; break;
+		default:
+			return -EINVAL;
+		}
+		err = ctx->pread(ptr, len, ctx, sec+o.cpt_hdrlen);
+		if (err)
+			return err;
+		i++;
+		sec += o.cpt_next;
+	}
+
+	return 0;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/rst_x8664.S linux-2.6.9-ve023stab054/kernel/cpt/rst_x8664.S
--- linux-2.6.9-100.orig/kernel/cpt/rst_x8664.S	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/rst_x8664.S	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,64 @@
+#define ASSEMBLY 1
+	
+#include <linux/config.h>
+
+#undef CONFIG_DEBUG_INFO
+	
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/dwarf2.h>
+#include <asm/calling.h>
+#include <asm/offset.h>
+#include <asm/msr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/hw_irq.h>
+#include <asm/errno.h>
+
+	.code64
+	.global schedule_tail_hook, schedule_tail_p
+	.align 8
+schedule_tail_hook:
+	movq schedule_tail_p(%rip),%r11
+	call *%r11
+	GET_THREAD_INFO(%rcx)
+	btr $22,threadinfo_flags(%rcx)	/* TIF_RESUME */
+	jc  1f
+	retq
+
+	/* If TIF_RESUME is set, (%rsp) is pointer to hook function
+	 * the hook will do the work and jump to the next hook,
+	 * everything should end at ret_from_fork+5.
+	 */
+1:	addq $8,%rsp
+	retq
+
+	.align 8
+	.global ret_from_fork2
+ret_from_fork2:
+	cmpq $0,ORIG_RAX(%rsp)
+	jge  ret_from_fork+5
+	RESTORE_REST
+	jmp  int_ret_from_sys_call	
+
+	.align 8
+	.global ret_last_siginfo
+ret_last_siginfo:
+	call rlsi
+	movq %rax,%rsp
+	retq
+
+	.align 8
+	.global ret_child_tid
+ret_child_tid:
+	movq %rsp,%rdi
+	call rct
+	movq %rax,%rsp
+	retq
+	
+	.data
+schedule_tail_p:
+	.quad	0
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/vzcpt.vzksyms linux-2.6.9-ve023stab054/kernel/cpt/vzcpt.vzksyms
--- linux-2.6.9-100.orig/kernel/cpt/vzcpt.vzksyms	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/vzcpt.vzksyms	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,7 @@
+handle_mm_fault
+add_to_swap_cache
+lru_cache_add_active
+swap_duplicate
+lookup_swap_cache
+read_swap_cache_async
+add_to_swap
diff -Nurap linux-2.6.9-100.orig/kernel/cpt/vzrst.vzksyms linux-2.6.9-ve023stab054/kernel/cpt/vzrst.vzksyms
--- linux-2.6.9-100.orig/kernel/cpt/vzrst.vzksyms	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/cpt/vzrst.vzksyms	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,10 @@
+free_swap_and_cache
+__add_to_swap_cache
+get_swap_page
+make_pages_present
+lru_cache_add_active
+add_to_swap_cache
+add_to_swap
+swap_duplicate
+swap_free
+syscall_exit
diff -Nurap linux-2.6.9-100.orig/kernel/cpu.c linux-2.6.9-ve023stab054/kernel/cpu.c
--- linux-2.6.9-100.orig/kernel/cpu.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/cpu.c	2011-06-15 19:26:22.000000000 +0400
@@ -11,7 +11,6 @@
 #include <linux/unistd.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
-#include <linux/kmod.h>		/* for hotplug_path */
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
 #include <asm/semaphore.h>
@@ -47,13 +46,18 @@ EXPORT_SYMBOL(unregister_cpu_notifier);
 extern int  __cpu_disable(void);
 extern void __cpu_die(unsigned int);
 
+
+#ifdef CONFIG_SCHED_VCPU
+#error "CONFIG_HOTPLUG_CPU isn't supported with CONFIG_SCHED_VCPU"
+#endif
+
 static inline void check_for_tasks(int cpu)
 {
 	struct task_struct *p;
 
 	write_lock_irq(&tasklist_lock);
-	for_each_process(p) {
-		if (task_cpu(p) == cpu && (p->utime != 0 || p->stime != 0))
+	for_each_process_all(p) {
+		if (task_pcpu(p) == cpu && (p->utime != 0 || p->stime != 0))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
 				(state = %ld, flags = %lx) \n",
 				 p->comm, p->pid, cpu, p->state, p->flags);
@@ -109,6 +113,13 @@ static int take_cpu_down(void *unused)
 	return err;
 }
 
+#ifdef CONFIG_SCHED_VCPU
+#error VCPU vs. HOTPLUG: fix hotplug code below
+/*
+ * What should be fixed:
+ * - check for if (idle_cpu()) yield()
+ */
+#endif
 int cpu_down(unsigned int cpu)
 {
 	int err;
@@ -203,6 +214,11 @@ int __devinit cpu_up(unsigned int cpu)
 		ret = -EINVAL;
 		goto out;
 	}
+
+	ret = vsched_init_default(cpu);
+	if (ret)
+		goto out;
+
 	ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
 	if (ret == NOTIFY_BAD) {
 		printk("%s: attempt to bring up CPU %u failed\n",
@@ -224,8 +240,10 @@ int __devinit cpu_up(unsigned int cpu)
 	cpu_run_sbin_hotplug(cpu, "online");
 
 out_notify:
-	if (ret != 0)
+	if (ret != 0) {
 		notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu);
+		vsched_fini_default(cpu);
+	}
 out:
 	up(&cpucontrol);
 	return ret;
diff -Nurap linux-2.6.9-100.orig/kernel/dump.c linux-2.6.9-ve023stab054/kernel/dump.c
--- linux-2.6.9-100.orig/kernel/dump.c	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/dump.c	2011-06-15 19:26:19.000000000 +0400
@@ -88,7 +88,7 @@ static struct gendisk *device_to_gendisk
 	int rc;
 
 	/* trace symlink to "block" */
-	nd.mnt = mntget(sysfs_mount);
+	nd.mnt = mntget(visible_sysfs_mount);
 	nd.dentry = dget(dev->kobj.dentry);
 	nd.flags = LOOKUP_FOLLOW;
 	nd.last_type = LAST_ROOT;
diff -Nurap linux-2.6.9-100.orig/kernel/exit.c linux-2.6.9-ve023stab054/kernel/exit.c
--- linux-2.6.9-100.orig/kernel/exit.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/exit.c	2011-06-15 19:26:22.000000000 +0400
@@ -22,10 +22,12 @@
 #include <linux/binfmts.h>
 #include <linux/ptrace.h>
 #include <linux/profile.h>
+#include <linux/swap.h>
 #include <linux/mount.h>
 #include <linux/proc_fs.h>
 #include <linux/mempolicy.h>
 #include <linux/audit.h> /* for audit_free() */
+#include <linux/faudit.h>
 #include <linux/task_io_accounting_ops.h>
 
 #include <asm/uaccess.h>
@@ -33,6 +35,8 @@
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 
+#include <ub/ub_mem.h>
+
 extern void sem_exit (void);
 extern struct task_struct *child_reaper;
 
@@ -51,18 +55,19 @@ static void __unhash_process(struct task
 	}
 
 	REMOVE_LINKS(p);
+	REMOVE_VE_LINKS(p);
 }
 
 void release_task(struct task_struct * p)
 {
 	int zap_leader;
 	task_t *leader;
-	struct dentry *proc_dentry;
+	struct dentry *proc_dentry[2];
 
 repeat: 
 	atomic_dec(&p->user->processes);
 	spin_lock(&p->proc_lock);
-	proc_dentry = proc_pid_unhash(p);
+	proc_pid_unhash(p, proc_dentry);
 	write_lock_irq(&tasklist_lock);
 	if (unlikely(p->ptrace))
 		__ptrace_unlink(p);
@@ -70,6 +75,8 @@ repeat: 
 	__exit_signal(p);
 	__exit_sighand(p);
 	__unhash_process(p);
+	nr_zombie--;
+	atomic_inc(&nr_dead);
 
 	/*
 	 * If we are the last non-leader member of the thread
@@ -97,6 +104,8 @@ repeat: 
 	spin_unlock(&p->proc_lock);
 	proc_pid_flush(proc_dentry);
 	release_thread(p);
+	if (atomic_dec_and_test(&VE_TASK_INFO(p)->owner_env->pcounter))
+		do_env_cleanup(VE_TASK_INFO(p)->owner_env);
 	put_task_struct(p);
 
 	p = leader;
@@ -108,10 +117,10 @@ repeat: 
 
 void unhash_process(struct task_struct *p)
 {
-	struct dentry *proc_dentry;
+	struct dentry *proc_dentry[2];
 
 	spin_lock(&p->proc_lock);
-	proc_dentry = proc_pid_unhash(p);
+	proc_pid_unhash(p, proc_dentry);
 	write_lock_irq(&tasklist_lock);
 	__unhash_process(p);
 	write_unlock_irq(&tasklist_lock);
@@ -129,14 +138,16 @@ int session_of_pgrp(int pgrp)
 	struct task_struct *p;
 	int sid = -1;
 
+	WARN_ON(is_virtual_pid(pgrp));
+
 	read_lock(&tasklist_lock);
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p->signal->session > 0) {
 			sid = p->signal->session;
 			goto out;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
-	p = find_task_by_pid(pgrp);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+	p = find_task_by_pid_ve(pgrp);
 	if (p)
 		sid = p->signal->session;
 out:
@@ -158,17 +169,19 @@ static int will_become_orphaned_pgrp(int
 	struct task_struct *p;
 	int ret = 1;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	WARN_ON(is_virtual_pid(pgrp));
+
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p == ignored_task
 				|| p->exit_state >= EXIT_ZOMBIE
-				|| p->real_parent->pid == 1)
+				|| virt_pid(p->real_parent) == 1)
 			continue;
 		if (process_group(p->real_parent) != pgrp
 			    && p->real_parent->signal->session == p->signal->session) {
 			ret = 0;
 			break;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return ret;	/* (sighing) "Often!" */
 }
 
@@ -176,6 +189,8 @@ int is_orphaned_pgrp(int pgrp)
 {
 	int retval;
 
+	WARN_ON(is_virtual_pid(pgrp));
+
 	read_lock(&tasklist_lock);
 	retval = will_become_orphaned_pgrp(pgrp, NULL);
 	read_unlock(&tasklist_lock);
@@ -188,7 +203,9 @@ static inline int has_stopped_jobs(int p
 	int retval = 0;
 	struct task_struct *p;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	WARN_ON(is_virtual_pid(pgrp));
+
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p->state != TASK_STOPPED)
 			continue;
 
@@ -204,7 +221,7 @@ static inline int has_stopped_jobs(int p
 
 		retval = 1;
 		break;
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return retval;
 }
 
@@ -269,6 +286,7 @@ void set_special_pids(pid_t session, pid
 	__set_special_pids(session, pgrp);
 	write_unlock_irq(&tasklist_lock);
 }
+EXPORT_SYMBOL(set_special_pids);
 
 /*
  * Let kernel threads use this to say that they
@@ -495,6 +513,10 @@ static inline void __exit_mm(struct task
 	mm_release(tsk, mm);
 	if (!mm)
 		return;
+
+	if (test_tsk_thread_flag(tsk, TIF_MEMDIE))
+		mm->oom_killed = 1;
+
 	/*
 	 * Serialize with any possible pending coredump.
 	 * We must hold mmap_sem around checking core_waiters
@@ -528,6 +550,7 @@ void exit_mm(struct task_struct *tsk)
 {
 	__exit_mm(tsk);
 }
+EXPORT_SYMBOL(exit_mm);
 
 static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
 {
@@ -613,13 +636,12 @@ static inline void reparent_thread(task_
 static inline void forget_original_parent(struct task_struct * father,
 					  struct list_head *to_release)
 {
-	struct task_struct *p, *reaper = father;
+	struct task_struct *p, *tsk_reaper, *reaper = father;
 	struct list_head *_p, *_n;
 
 	do {
 		reaper = next_thread(reaper);
 		if (reaper == father) {
-			reaper = child_reaper;
 			break;
 		}
 	} while (reaper->exit_state >= EXIT_ZOMBIE);
@@ -646,9 +668,16 @@ static inline void forget_original_paren
 		/* if father isn't the real parent, then ptrace must be enabled */
 		BUG_ON(father != p->real_parent && !ptrace);
 
+		tsk_reaper = reaper;
+		if (tsk_reaper == father)
+#ifdef CONFIG_VE
+			tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
+		if (tsk_reaper == p)
+#endif
+			tsk_reaper = child_reaper;
 		if (father == p->real_parent) {
-			/* reparent with a reaper, real father it's us */
-			choose_new_parent(p, reaper, child_reaper);
+			/* reparent with a tsk_reaper, real father it's us */
+			choose_new_parent(p, tsk_reaper, child_reaper);
 			reparent_thread(p, father, 0);
 		} else {
 			/* reparent ptraced task to its real parent */
@@ -673,7 +702,14 @@ static inline void forget_original_paren
 		if (p->first_time_slice == father->pid)
 			p->first_time_slice = 0;
 
-		choose_new_parent(p, reaper, child_reaper);
+		tsk_reaper = reaper;
+		if (tsk_reaper == father)
+#ifdef CONFIG_VE
+			tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
+		if (tsk_reaper == p)
+#endif
+			tsk_reaper = child_reaper;
+		choose_new_parent(p, tsk_reaper, child_reaper);
 		reparent_thread(p, father, 1);
 	}
 }
@@ -765,6 +801,9 @@ static void exit_notify(struct task_stru
 	     tsk->self_exec_id != tsk->parent_exec_id))
 		tsk->exit_signal = SIGCHLD;
 
+	if (tsk->exit_signal != -1 && t == child_reaper)
+		/* We dont want people slaying init. */
+		tsk->exit_signal = SIGCHLD;
 
 	/* If something other than our normal parent is ptracing us, then
 	 * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
@@ -783,6 +822,7 @@ static void exit_notify(struct task_stru
 	     unlikely(tsk->parent->signal->group_exit)))
 		state = EXIT_DEAD;
 	tsk->exit_state = state;
+	nr_zombie++;
 
 	/*
 	 * Clear these here so that update_process_times() won't try to deliver
@@ -809,19 +849,104 @@ static void exit_notify(struct task_stru
 	tsk->flags |= PF_DEAD;
 }
 
+asmlinkage long sys_wait4(pid_t pid, int * stat_addr, int options, struct rusage * ru);
+
+#ifdef CONFIG_VE
+/*
+ * Handle exitting of init process, it's a special case for VE.
+ */
+static void do_initproc_exit(void)
+{
+	struct task_struct *tsk;
+	struct ve_struct *env;
+	struct siginfo info;
+	struct task_struct *g, *p;
+	long delay = 1L;
+
+	tsk = current;
+	env = VE_TASK_INFO(current)->owner_env;
+	if (env->init_entry != tsk)
+		return;
+
+	if (ve_is_super(env) && tsk->pid == 1)
+		panic("Attempted to kill init!");
+
+	memset(&info, 0, sizeof(info));
+	info.si_errno = 0;
+	info.si_code = SI_KERNEL;
+	info.si_pid = virt_pid(tsk);
+	info.si_uid = current->uid;
+	info.si_signo = SIGKILL;
+
+	/*
+	 * Here the VE changes its state into "not running".
+	 * op_sem taken for write is a barrier to all VE manipulations from
+	 * ioctl: it waits for operations currently in progress and blocks all
+	 * subsequent operations until is_running is set to 0 and op_sem is
+	 * released.
+	 */
+	down_write(&env->op_sem);
+	env->is_running = 0;
+	up_write(&env->op_sem);
+
+	/* send kill to all processes of VE */
+	read_lock(&tasklist_lock);
+	do_each_thread_ve(g, p) {
+		force_sig_info(SIGKILL, &info, p);
+	} while_each_thread_ve(g, p);
+	read_unlock(&tasklist_lock);
+
+	/* wait for all init childs exit */
+	while (atomic_read(&env->pcounter) > 1) {
+		if (sys_wait4(-1, NULL, __WALL | WNOHANG, NULL) > 0)
+			continue;
+		/* it was ENOCHLD or no more children somehow */
+		if (atomic_read(&env->pcounter) == 1)
+			break;
+
+		/* clear all signals to avoid wakeups */
+		if (signal_pending(tsk))
+			flush_signals(tsk);
+		/* we have child without signal sent */
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(delay);
+		delay = (delay < HZ) ? (delay << 1) : HZ;
+		read_lock(&tasklist_lock);
+		do_each_thread_ve(g, p) {
+			if (p != tsk)
+				force_sig_info(SIGKILL, &info, p);
+		} while_each_thread_ve(g, p);
+		read_unlock(&tasklist_lock);
+	}
+	env->init_entry = child_reaper;
+	write_lock_irq(&tasklist_lock);
+	REMOVE_LINKS(tsk);
+	tsk->parent = tsk->real_parent = child_reaper;
+	SET_LINKS(tsk);
+	write_unlock_irq(&tasklist_lock);
+}
+#endif
+
 asmlinkage NORET_TYPE void do_exit(long code)
 {
 	struct task_struct *tsk = current;
 	int group_dead;
+	struct mm_struct *mm;
 
+	mm = tsk->mm;
 	profile_task_exit(tsk);
 
 	if (unlikely(in_interrupt()))
 		panic("Aiee, killing interrupt handler!");
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
+#ifndef CONFIG_VE
 	if (unlikely(tsk->pid == 1))
 		panic("Attempted to kill init!");
+#else
+	do_initproc_exit();
+#endif
+	virtinfo_gencall(VIRTINFO_DOEXIT, NULL);
 
 	/*
 	 * If do_exit is called because this processes oopsed, it's possible
@@ -837,7 +962,9 @@ asmlinkage NORET_TYPE void do_exit(long 
 
 	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
 		current->ptrace_message = code;
+		set_pn_state(current, PN_STOP_EXIT);
 		ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
+		clear_pn_state(current);
 	}
 
 	tsk->flags |= PF_EXITING;
@@ -881,6 +1008,7 @@ asmlinkage NORET_TYPE void do_exit(long 
 
 	tsk->exit_code = code;
 	exit_notify(tsk);
+
 #ifdef CONFIG_NUMA
 	mpol_free(tsk->mempolicy);
 	tsk->mempolicy = NULL;
@@ -905,19 +1033,22 @@ EXPORT_SYMBOL(complete_and_exit);
 
 asmlinkage long sys_exit(int error_code)
 {
+	virtinfo_notifier_call(VITYPE_FAUDIT,
+			VIRTINFO_FAUDIT_EXIT, &error_code);
 	do_exit((error_code&0xff)<<8);
 }
 
 task_t fastcall *next_thread(const task_t *p)
 {
+	task_t *tsk;
 #ifdef CONFIG_SMP
-	if (!p->sighand)
-		BUG();
-	if (!spin_is_locked(&p->sighand->siglock) &&
-				!rwlock_is_locked(&tasklist_lock))
+	if (!rwlock_is_locked(&tasklist_lock) || p->pids[PIDTYPE_TGID].nr == 0)
 		BUG();
 #endif
-	return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
+	tsk = pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
+	/* all threads should belong to ONE ve! */
+	BUG_ON(VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(p)->owner_env);
+	return tsk;
 }
 
 EXPORT_SYMBOL(next_thread);
@@ -967,14 +1098,19 @@ asmlinkage void sys_exit_group(int error
 static int eligible_child(pid_t pid, int options, task_t *p)
 {
 	if (pid > 0) {
-		if (p->pid != pid)
+		if ((is_virtual_pid(pid) ? virt_pid(p) : p->pid) != pid)
 			return 0;
 	} else if (!pid) {
 		if (process_group(p) != process_group(current))
 			return 0;
 	} else if (pid != -1) {
-		if (process_group(p) != -pid)
-			return 0;
+		if (__is_virtual_pid(-pid)) {
+			if (virt_pgid(p) != -pid)
+				return 0;
+		} else {
+			if (process_group(p) != -pid)
+				return 0;
+		}
 	}
 
 	/*
@@ -1044,7 +1180,7 @@ static int wait_task_zombie(task_t *p, i
 	int status;
 
 	if (unlikely(noreap)) {
-		pid_t pid = p->pid;
+		pid_t pid = get_task_pid(p);
 		uid_t uid = p->uid;
 		int exit_code = p->exit_code;
 		int why, status;
@@ -1151,7 +1287,7 @@ static int wait_task_zombie(task_t *p, i
 			retval = put_user(status, &infop->si_status);
 	}
 	if (!retval && infop)
-		retval = put_user(p->pid, &infop->si_pid);
+		retval = put_user(get_task_pid(p), &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(p->uid, &infop->si_uid);
 	if (retval) {
@@ -1159,7 +1295,7 @@ static int wait_task_zombie(task_t *p, i
 		p->exit_state = EXIT_ZOMBIE;
 		return retval;
 	}
-	retval = p->pid;
+	retval = get_task_pid(p);
 	if (p->real_parent != p->parent) {
 		write_lock_irq(&tasklist_lock);
 		/* Double-check with lock held.  */
@@ -1219,7 +1355,7 @@ static int wait_task_stopped(task_t *p, 
 	read_unlock(&tasklist_lock);
 
 	if (unlikely(noreap)) {
-		pid_t pid = p->pid;
+		pid_t pid = get_task_pid(p);
 		uid_t uid = p->uid;
 		int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
 
@@ -1289,11 +1425,11 @@ bail_ref:
 	if (!retval && infop)
 		retval = put_user(exit_code, &infop->si_status);
 	if (!retval && infop)
-		retval = put_user(p->pid, &infop->si_pid);
+		retval = put_user(get_task_pid(p), &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(p->uid, &infop->si_uid);
 	if (!retval)
-		retval = p->pid;
+		retval = get_task_pid(p);
 	put_task_struct(p);
 
 	BUG_ON(!retval);
@@ -1329,7 +1465,7 @@ static int wait_task_continued(task_t *p
 		p->signal->stop_state = 0;
 	spin_unlock_irq(&p->sighand->siglock);
 
-	pid = p->pid;
+	pid = get_task_pid(p);
 	uid = p->uid;
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
@@ -1340,7 +1476,7 @@ static int wait_task_continued(task_t *p
 		if (!retval && stat_addr)
 			retval = put_user(0xffff, stat_addr);
 		if (!retval)
-			retval = p->pid;
+			retval = get_task_pid(p);
 	} else {
 		retval = wait_noreap_copyout(p, pid, uid,
 					     CLD_CONTINUED, SIGCONT,
@@ -1574,6 +1710,7 @@ asmlinkage long sys_wait4(pid_t pid, int
 	prevent_tail_call(ret);
 	return ret;
 }
+EXPORT_SYMBOL(sys_wait4);
 
 #ifdef __ARCH_WANT_SYS_WAITPID
 
diff -Nurap linux-2.6.9-100.orig/kernel/extable.c linux-2.6.9-ve023stab054/kernel/extable.c
--- linux-2.6.9-100.orig/kernel/extable.c	2004-10-19 01:53:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/extable.c	2011-06-15 19:26:19.000000000 +0400
@@ -49,6 +49,7 @@ static int core_kernel_text(unsigned lon
 	if (addr >= (unsigned long)_sinittext &&
 	    addr <= (unsigned long)_einittext)
 		return 1;
+
 	return 0;
 }
 
diff -Nurap linux-2.6.9-100.orig/kernel/fairsched.c linux-2.6.9-ve023stab054/kernel/fairsched.c
--- linux-2.6.9-100.orig/kernel/fairsched.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/fairsched.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,1381 @@
+/*
+ * Fair Scheduler
+ *
+ * Copyright (C) 2000-2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * Start-tag scheduling follows the theory presented in
+ * http://www.cs.utexas.edu/users/dmcl/papers/ps/SIGCOMM96.ps
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <asm/timex.h>
+#include <asm/atomic.h>
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/dcache.h>
+#include <linux/sysctl.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/console.h>
+#include <linux/fairsched.h>
+#include <linux/vsched.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_USER_RESOURCE
+#include <ub/ub_mem.h>
+#else
+#define ub_vmalloc	vmalloc
+#endif
+
+/* we need it for vsched routines in sched.c */
+spinlock_t fairsched_lock = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_FAIRSCHED
+
+#define FAIRSHED_DEBUG		" debug"
+
+
+/*********************************************************************/
+/*
+ * Special arithmetics
+ */
+/*********************************************************************/
+
+#define CYCLES_SHIFT (8)
+#define SCYCLES_TIME(time) \
+        ((scycles_t) {((time) + (1 << CYCLES_SHIFT) - 1)  >> CYCLES_SHIFT})
+
+#define CYCLES_ZERO (0)
+static inline int CYCLES_BEFORE(cycles_t x, cycles_t y)
+{
+        return (__s64)(x-y) < 0;
+}
+static inline int CYCLES_AFTER(cycles_t x, cycles_t y)
+{
+        return (__s64)(y-x) < 0;
+}
+static inline void CYCLES_DADD(cycles_t *x, fschdur_t y) {*x+=y.d;}
+
+#define FSCHDUR_ZERO (0)
+#define TICK_DUR ((fschdur_t){cycles_per_jiffy})
+static inline fschdur_t FSCHDURATION(cycles_t x, cycles_t y)
+{
+	return (fschdur_t){x - y};
+}
+static inline int FSCHDUR_CMP(fschdur_t x, fschdur_t y)
+{
+	if (x.d < y.d) return -1;
+	if (x.d > y.d) return 1;
+	return 0;
+}
+static inline fschdur_t FSCHDUR_SUB(fschdur_t x, fschdur_t y)
+{
+	return (fschdur_t){x.d - y.d};
+}
+
+#define FSCHTAG_ZERO ((fschtag_t){0})
+static inline int FSCHTAG_CMP(fschtag_t x, fschtag_t y)
+{
+	if (x.t < y.t) return -1;
+	if (x.t > y.t) return 1;
+	return 0;
+}
+static inline fschtag_t FSCHTAG_MAX(fschtag_t x, fschtag_t y)
+{
+	return x.t >= y.t ? x : y;
+}
+static inline int FSCHTAG_DADD(fschtag_t *tag, fschdur_t dur, unsigned w)
+{
+	cycles_t new_tag;
+	new_tag = tag->t + (cycles_t)dur.d * w;
+	if (new_tag < tag->t)
+		return -1;
+	/* DEBUG */
+	if (new_tag >= (1ULL << 48))
+		return -1;
+	tag->t = new_tag;
+	return 0;
+}
+static inline int FSCHTAG_ADD(fschtag_t *tag, fschtag_t y)
+{
+	cycles_t new_tag;
+	new_tag = tag->t + y.t;
+	if (new_tag < tag->t)
+		return -1;
+	tag->t = new_tag;
+	return 0;
+}
+static inline fschtag_t FSCHTAG_SUB(fschtag_t x, fschtag_t y)
+{
+	return (fschtag_t){x.t - y.t};
+}
+
+#define FSCHVALUE_FMT "%Lu"
+#define FSCHVALUE_PRINT(x) ((x).v)
+#define FSCHVALUE_ZERO ((fschvalue_t){0})
+#define TICK_VALUE ((fschvalue_t){(cycles_t)cycles_per_jiffy << FSCHRATE_SHIFT})
+static inline fschvalue_t FSCHVALUE(unsigned long t)
+{
+	return (fschvalue_t){(cycles_t)t << FSCHRATE_SHIFT};
+}
+static inline int FSCHVALUE_CMP(fschvalue_t x, fschvalue_t y)
+{
+	if (x.v < y.v) return -1;
+	if (x.v > y.v) return 1;
+	return 0;
+}
+static inline void FSCHVALUE_DADD(fschvalue_t *val, fschdur_t dur,
+		unsigned rate)
+{
+	val->v += (cycles_t)dur.d * rate;
+}
+static inline fschvalue_t FSCHVALUE_SUB(fschvalue_t x, fschvalue_t y)
+{
+	return (fschvalue_t){x.v - y.v};
+}
+static inline cycles_t FSCHVALUE_TO_DELAY(fschvalue_t val, unsigned rate)
+{
+	unsigned long t;
+	/*
+	 * Here we lose precision to make the division 32-bit on IA-32.
+	 * The value is not greater than TICK_VALUE.
+	 * (TICK_VALUE >> FSCHRATE_SHIFT) fits unsigned long.
+	 */
+	t = (val.v + (1 << FSCHRATE_SHIFT) - 1) >> FSCHRATE_SHIFT;
+	return (cycles_t)((t + rate - 1) / rate) << FSCHRATE_SHIFT;
+}
+
+
+/*********************************************************************/
+/*
+ * Global data
+ */
+/*********************************************************************/
+
+/*
+ * Assertions.
+ * Called with preemption disabled.
+ */
+
+#define fsch_assert(x)							\
+	do {								\
+		static int count;					\
+		if (x)							\
+			break;						\
+		if (count++ > 10)					\
+			break;						\
+		__printk_no_wake++;					\
+		printk("fsch_assert " #x " failed\n");			\
+		__printk_no_wake--;					\
+	} while (0)
+
+#define fsch_validate(x, fmt...)					\
+	do {								\
+		static int count;					\
+		if (x)							\
+			break;						\
+		if (count++ > 10)					\
+			break;						\
+		__printk_no_wake++;					\
+		printk("fsch_assert " #x " failed\n");			\
+		printk("fsch_assert: " fmt);				\
+		__printk_no_wake--;					\
+	} while (0)
+
+/*
+ * Configurable parameters
+ */
+unsigned fairsched_max_latency = 25; /* jiffies */
+
+/*
+ * Parameters initialized at startup
+ */
+/* Number of online CPUs */
+unsigned fairsched_nr_cpus;
+/* Token Bucket depth (burst size) */
+static fschvalue_t max_value;
+
+struct fairsched_node fairsched_init_node = {
+	.id		= INT_MAX,
+#ifdef CONFIG_VE
+	.owner_env	= get_ve0(),
+#endif
+	.weight		= 1,
+};
+EXPORT_SYMBOL(fairsched_init_node);
+
+struct fairsched_node fairsched_idle_node = {
+	.id =			-1,
+};
+
+static int fairsched_nr_nodes;
+static LIST_HEAD(fairsched_node_head);
+static LIST_HEAD(fairsched_running_head);
+static LIST_HEAD(fairsched_delayed_head);
+
+DEFINE_PER_CPU(cycles_t, prev_schedule);
+static fschtag_t max_latency;
+
+static DECLARE_MUTEX(fairsched_mutex);
+
+/*********************************************************************/
+/*
+ * Small helper routines
+ */
+/*********************************************************************/
+
+/* this didn't proved to be very valuable statistics... */
+#define fairsched_inc_ve_strv(node, cycles)  do {} while(0)
+#define fairsched_dec_ve_strv(node, cycles)  do {} while(0)
+
+/*********************************************************************/
+/*
+ * Runlist management
+ */
+/*********************************************************************/
+
+/*
+ * Returns the start_tag of the first runnable node, or 0.
+ */
+static inline fschtag_t virtual_time(void)
+{
+	struct fairsched_node *p;
+
+	if (!list_empty(&fairsched_running_head)) {
+		p = list_first_entry(&fairsched_running_head,
+				struct fairsched_node, runlist);
+		return p->start_tag;
+	}
+	return FSCHTAG_ZERO;
+}
+
+static void fairsched_recompute_max_latency(void)
+{
+	struct fairsched_node *p;
+	unsigned w;
+	fschtag_t tag;
+
+	w = FSCHWEIGHT_MAX;
+	list_for_each_entry(p, &fairsched_node_head, nodelist) {
+		if (p->weight < w)
+			w = p->weight;
+	}
+	tag = FSCHTAG_ZERO;
+	(void) FSCHTAG_DADD(&tag, TICK_DUR,
+				fairsched_nr_cpus * fairsched_max_latency * w);
+	max_latency = tag;
+}
+
+static void fairsched_reset_start_tags(void)
+{
+	struct fairsched_node *cnode;
+	fschtag_t min_tag;
+
+	min_tag = virtual_time();
+	list_for_each_entry(cnode, &fairsched_node_head, nodelist) {
+		if (FSCHTAG_CMP(cnode->start_tag, min_tag) > 0)
+			cnode->start_tag = FSCHTAG_SUB(cnode->start_tag,
+						       min_tag);
+		else
+			cnode->start_tag = FSCHTAG_ZERO;
+	}
+}
+
+static void fairsched_running_insert(struct fairsched_node *node)
+{
+	struct list_head *tmp;
+	struct fairsched_node *p;
+	fschtag_t start_tag_max;
+
+	if (!list_empty(&fairsched_running_head)) {
+		start_tag_max = virtual_time();
+		if (!FSCHTAG_ADD(&start_tag_max, max_latency) &&
+		    FSCHTAG_CMP(start_tag_max, node->start_tag) < 0)
+			node->start_tag = start_tag_max;
+	}
+
+	list_for_each(tmp, &fairsched_running_head) {
+		p = list_entry(tmp, struct fairsched_node, runlist);
+		if (FSCHTAG_CMP(node->start_tag, p->start_tag) <= 0)
+			break;
+	}
+	/* insert node just before tmp */
+	list_add_tail(&node->runlist, tmp);
+}
+
+static inline void fairsched_running_insert_fromsleep(
+		struct fairsched_node *node)
+{
+	node->start_tag = FSCHTAG_MAX(node->start_tag, virtual_time());
+	fairsched_running_insert(node);
+}
+
+
+/*********************************************************************/
+/*
+ * CPU limiting helper functions
+ *
+ * These functions compute rates, delays and manipulate with sleep
+ * lists and so on.
+ */
+/*********************************************************************/
+
+/*
+ * Insert a node into the list of nodes removed from scheduling,
+ * sorted by the time at which the the node is allowed to run,
+ * historically called `delay'.
+ */
+static void fairsched_delayed_insert(struct fairsched_node *node)
+{
+	struct fairsched_node *p;
+	struct list_head *tmp;
+
+	list_for_each(tmp, &fairsched_delayed_head) {
+		p = list_entry(tmp, struct fairsched_node,
+				   runlist);
+		if (CYCLES_AFTER(p->delay, node->delay))
+			break;
+	}
+        /* insert node just before tmp */
+	list_add_tail(&node->runlist, tmp);
+}
+
+static inline void nodevalue_add(struct fairsched_node *node,
+		fschdur_t duration, unsigned rate)
+{
+	FSCHVALUE_DADD(&node->value, duration, rate);
+	if (FSCHVALUE_CMP(node->value, max_value) > 0)
+		node->value = max_value;
+}
+
+/*
+ * The node has been selected to run.
+ * This function accounts in advance for the time that the node will run.
+ * The advance not used by the node will be credited back.
+ */
+static void fairsched_ratelimit_charge_advance(
+		struct fairsched_node *node,
+		cycles_t time)
+{
+	fsch_assert(!node->delayed);
+	fsch_validate(FSCHVALUE_CMP(node->value, TICK_VALUE) >= 0,
+			"charge, value " FSCHVALUE_FMT
+			", tick " FSCHVALUE_FMT
+			", delay %Lu, time %Lu"
+			", lastupd %Lu, rate %u\n",
+			FSCHVALUE_PRINT(node->value),
+			FSCHVALUE_PRINT(TICK_VALUE),
+			node->delay, time,
+			node->last_updated_at, node->rate);
+
+	/*
+	 * Account for the time passed since last update.
+	 * It might be needed if the node has become runnable because of
+	 * a wakeup, but hasn't gone through other functions updating
+	 * the bucket value.
+	 */
+	if (CYCLES_AFTER(time, node->last_updated_at)) {
+		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
+			      node->rate);
+		node->last_updated_at = time;
+	}
+
+	/* charge for the full tick the node might be running */
+	node->value = FSCHVALUE_SUB(node->value, TICK_VALUE);
+	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
+		list_del(&node->runlist);
+		node->delayed = 1;
+		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
+					FSCHVALUE_SUB(TICK_VALUE, node->value),
+					node->rate);
+		node->nr_ready = 0;
+		fairsched_delayed_insert(node);
+	}
+}
+
+static void fairsched_ratelimit_credit_unused(
+		struct fairsched_node *node,
+		cycles_t time, fschdur_t duration)
+{
+	/* account for the time passed since last update */
+	if (CYCLES_AFTER(time, node->last_updated_at)) {
+		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
+			      node->rate);
+		node->last_updated_at = time;
+	}
+
+	/*
+	 * When the node was given this CPU, it was charged for 1 tick.
+	 * Credit back the unused time.
+	 */
+	if (FSCHDUR_CMP(duration, TICK_DUR) < 0)
+		nodevalue_add(node, FSCHDUR_SUB(TICK_DUR, duration),
+			      1 << FSCHRATE_SHIFT);
+
+	/* check if the node is allowed to run */
+	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
+		/*
+		 * The node was delayed and remain such.
+		 * But since the bucket value has been updated,
+		 * update the delay time and move the node in the list.
+		 */
+		fsch_assert(node->delayed);
+		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
+					FSCHVALUE_SUB(TICK_VALUE, node->value),
+					node->rate);
+	} else if (node->delayed) {
+		/*
+		 * The node was delayed, but now it is allowed to run.
+		 * We do not manipulate with lists, it will be done by the
+		 * caller.
+		 */
+		node->nr_ready = node->nr_runnable;
+		node->delayed = 0;
+	}
+}
+
+static void fairsched_delayed_wake(cycles_t time)
+{
+	struct fairsched_node *p;
+
+	while (!list_empty(&fairsched_delayed_head)) {
+		p = list_entry(fairsched_delayed_head.next,
+				  struct fairsched_node,
+				  runlist);
+		if (CYCLES_AFTER(p->delay, time))
+			break;
+
+		/* ok, the delay period is completed */
+		/* account for the time passed since last update */
+		if (CYCLES_AFTER(time, p->last_updated_at)) {
+			nodevalue_add(p, FSCHDURATION(time, p->last_updated_at),
+					p->rate);
+			p->last_updated_at = time;
+		}
+
+		fsch_validate(FSCHVALUE_CMP(p->value, TICK_VALUE) >= 0,
+				"wake, value " FSCHVALUE_FMT
+				", tick " FSCHVALUE_FMT
+				", delay %Lu, time %Lu"
+				", lastupd %Lu, rate %u\n",
+				FSCHVALUE_PRINT(p->value),
+				FSCHVALUE_PRINT(TICK_VALUE),
+				p->delay, time,
+				p->last_updated_at, p->rate);
+		p->nr_ready = p->nr_runnable;
+		p->delayed = 0;
+		list_del_init(&p->runlist);
+		if (p->nr_ready)
+			fairsched_running_insert_fromsleep(p);
+	}
+}
+
+static struct fairsched_node *fairsched_find(unsigned int id);
+
+void fairsched_cpu_online_map(int id, cpumask_t *mask)
+{
+	struct fairsched_node *node;
+
+	down(&fairsched_mutex);
+	node = fairsched_find(id);
+	if (node == NULL)
+		*mask = CPU_MASK_NONE;
+	else
+		vsched_cpu_online_map(node->vsched, mask);
+	up(&fairsched_mutex);
+}
+
+
+/*********************************************************************/
+/*
+ * The heart of the algorithm:
+ * fairsched_incrun, fairsched_decrun, fairsched_schedule
+ *
+ * Note: old property nr_ready >= nr_pcpu doesn't hold anymore.
+ * However, nr_runnable, nr_ready and delayed are maintained in sync.
+ */
+/*********************************************************************/
+
+/*
+ * Called on a wakeup inside the node.
+ */
+void fairsched_incrun(struct fairsched_node *node)
+{
+	if (!node->delayed && !node->nr_ready++)
+		/* the node wasn't on the running list, insert */
+		fairsched_running_insert_fromsleep(node);
+	node->nr_runnable++;
+}
+
+/*
+ * Called from inside schedule() when a sleeping state is entered.
+ */
+void fairsched_decrun(struct fairsched_node *node)
+{
+	if (!node->delayed && !--node->nr_ready)
+		/* nr_ready changed 1->0, remove from the running list */
+		list_del_init(&node->runlist);
+	--node->nr_runnable;
+}
+
+void fairsched_inccpu(struct fairsched_node *node)
+{
+	node->nr_pcpu++;
+	fairsched_dec_ve_strv(node, cycles);
+}
+
+static inline void __fairsched_deccpu(struct fairsched_node *node)
+{
+	node->nr_pcpu--;
+	fairsched_inc_ve_strv(node, cycles);
+}
+
+void fairsched_deccpu(struct fairsched_node *node)
+{
+	if (node == &fairsched_idle_node)
+		return;
+
+	__fairsched_deccpu(node);
+}
+
+static void fairsched_account(struct fairsched_node *node,
+		cycles_t time)
+{
+	fschdur_t duration;
+
+	duration = FSCHDURATION(time, __get_cpu_var(prev_schedule));
+#ifdef CONFIG_VE
+	CYCLES_DADD(&node->owner_env->cpu_used_ve, duration);
+#endif
+
+	/*
+	 * The duration is not greater than TICK_DUR since
+	 * task->need_resched is always 1.
+	 */
+	if (FSCHTAG_DADD(&node->start_tag, duration, node->weight)) {
+		fairsched_reset_start_tags();
+		(void) FSCHTAG_DADD(&node->start_tag, duration,
+					node->weight);
+	}
+
+	list_del_init(&node->runlist);
+	if (node->rate_limited)
+		fairsched_ratelimit_credit_unused(node, time, duration);
+	if (!node->delayed) {
+		if (node->nr_ready)
+			fairsched_running_insert(node);
+	} else
+		fairsched_delayed_insert(node);
+}
+
+/*
+ * Scheduling decision
+ *
+ * Updates CPU usage for the node releasing the CPU and selects a new node.
+ */
+struct fairsched_node *fairsched_schedule(
+		struct fairsched_node *prev_node,
+		struct fairsched_node *cur_node,
+		int cur_node_active,
+		cycles_t time)
+{
+	struct fairsched_node *p;
+
+	if (prev_node != &fairsched_idle_node)
+		fairsched_account(prev_node, time);
+	__get_cpu_var(prev_schedule) = time;
+
+	fairsched_delayed_wake(time);
+
+	list_for_each_entry(p, &fairsched_running_head, runlist) {
+		if (p->nr_pcpu < p->nr_ready ||
+		    (cur_node_active && p == cur_node)) {
+			if (p->rate_limited)
+				fairsched_ratelimit_charge_advance(p, time);
+			return p;
+		}
+	}
+	return NULL;
+}
+
+
+/*********************************************************************/
+/*
+ * System calls 
+ *
+ * All do_xxx functions are called under fairsched semaphore and after
+ * capability check.
+ *
+ * The binary interfaces follow some other Fair Scheduler implementations
+ * (although some system call arguments are not needed for our implementation).
+ */
+/*********************************************************************/
+
+static struct fairsched_node *fairsched_find(unsigned int id)
+{
+	struct fairsched_node *p;
+
+	list_for_each_entry(p, &fairsched_node_head, nodelist) {
+		if (p->id == id)
+			return p;
+	}
+	return NULL;
+}
+
+static int do_fairsched_mknod(unsigned int parent, unsigned int weight,
+		unsigned int newid)
+{
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -EINVAL;
+	if (weight < 1 || weight > FSCHWEIGHT_MAX)
+		goto out;
+	if (newid < 0 || newid > INT_MAX)
+		goto out;
+
+	retval = -EBUSY;
+	if (fairsched_find(newid) != NULL)
+		goto out;
+
+	retval = -ENOMEM;
+	node = kmalloc(sizeof(*node), GFP_KERNEL);
+	if (node == NULL)
+		goto out;
+
+	memset(node, 0, sizeof(*node));
+	node->weight = weight;
+	INIT_LIST_HEAD(&node->runlist);
+	node->id = newid;
+	node->vcpus = 0;
+#ifdef CONFIG_VE
+	node->owner_env = get_exec_env();
+#endif
+
+	spin_lock_irq(&fairsched_lock);
+	list_add(&node->nodelist, &fairsched_node_head);
+	fairsched_nr_nodes++;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	retval = newid;
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
+				    unsigned int newid)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	down(&fairsched_mutex);
+	retval = do_fairsched_mknod(parent, weight, newid);
+	up(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_mknod);
+
+static int do_fairsched_rmnod(unsigned int id)
+{
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -EINVAL;
+	node = fairsched_find(id);
+	if (node == NULL)
+		goto out;
+	if (node == &fairsched_init_node)
+		goto out;
+
+	retval = vsched_destroy(node->vsched);
+	if (retval)
+		goto out;
+
+	spin_lock_irq(&fairsched_lock);
+	list_del(&node->runlist); /* required for delayed nodes */
+	list_del(&node->nodelist);
+	fairsched_nr_nodes--;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	kfree(node);
+	retval = 0;
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_rmnod(unsigned int id)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	down(&fairsched_mutex);
+	retval = do_fairsched_rmnod(id);
+	up(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_rmnod);
+
+int do_fairsched_chwt(unsigned int id, unsigned weight)
+{
+	struct fairsched_node *node;
+
+	if (id == 0)
+		return -EINVAL;
+	if (weight < 1 || weight > FSCHWEIGHT_MAX)
+		return -EINVAL;
+
+	node = fairsched_find(id);
+	if (node == NULL)
+		return -ENOENT;
+
+	spin_lock_irq(&fairsched_lock);
+	node->weight = weight;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	return 0;
+}
+
+asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned weight)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	down(&fairsched_mutex);
+	retval = do_fairsched_chwt(id, weight);
+	up(&fairsched_mutex);
+
+	return retval;
+}
+
+int do_fairsched_vcpus(unsigned int id, unsigned int vcpus)
+{
+	struct fairsched_node *node;
+	int ret = 0;
+
+	if (id == 0)
+		return -EINVAL;
+
+	node = fairsched_find(id);
+	if (node == NULL)
+		return -ENOENT;
+
+	if (vcpus < 1 || vcpus > num_online_cpus())
+		vcpus = num_online_cpus();
+
+	node->vcpus = vcpus;
+	if (node->vsched != NULL) {
+		ret = vsched_set_vcpus(node->vsched, vcpus);
+		/* FIXME: adjust rate ... */
+	}
+
+	return ret;
+}
+
+asmlinkage int sys_fairsched_vcpus(unsigned int id, unsigned int vcpus)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	down(&fairsched_mutex);
+	retval = do_fairsched_vcpus(id, vcpus);
+	up(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_vcpus);
+
+int do_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	struct fairsched_node *node;
+	cycles_t time;
+	int retval;
+
+	if (id == 0)
+		return -EINVAL;
+	if (op == 0 && (rate < 1 || rate >= (1UL << 31)))
+		return -EINVAL;
+
+	node = fairsched_find(id);
+	if (node == NULL)
+		return -ENOENT;
+
+	retval = -EINVAL;
+	spin_lock_irq(&fairsched_lock);
+	time = get_cycles();
+	switch (op) {
+		case 0:
+			node->rate = rate;
+			if (node->rate > (fairsched_nr_cpus << FSCHRATE_SHIFT))
+				node->rate =
+					fairsched_nr_cpus << FSCHRATE_SHIFT;
+			node->rate_limited = 1;
+			node->value = max_value;
+			if (node->delayed) {
+				list_del(&node->runlist);
+				node->delay = time;
+				fairsched_delayed_insert(node);
+				node->last_updated_at = time;
+				fairsched_delayed_wake(time);
+			}
+			retval = node->rate;
+			break;
+		case 1:
+			node->rate = 0; /* This assignment is not needed
+					   for the kernel code, and it should
+					   not rely on rate being 0 when it's
+					   unset.  This is a band-aid for some
+					   existing tools (don't know which one
+					   exactly).  --SAW */
+			node->rate_limited = 0;
+			node->value = max_value;
+			if (node->delayed) {
+				list_del(&node->runlist);
+				node->delay = time;
+				fairsched_delayed_insert(node);
+				node->last_updated_at = time;
+				fairsched_delayed_wake(time);
+			}
+			retval = 0;
+			break;
+		case 2:
+			if (node->rate_limited)
+				retval = node->rate;
+			else
+				retval = -ENODATA;
+			break;
+	}
+	spin_unlock_irq(&fairsched_lock);
+
+	return retval;
+}
+
+asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	down(&fairsched_mutex);
+	retval = do_fairsched_rate(id, op, rate);
+	up(&fairsched_mutex);
+
+	return retval;
+}
+
+/*
+ * Called under fairsched_mutex.
+ */
+static int __do_fairsched_mvpr(struct task_struct *p,
+		struct fairsched_node *node)
+{
+	int retval;
+
+	if (node->vsched == NULL) {
+		retval = vsched_create(node->id, node);
+		if (retval < 0)
+			return retval;
+	}
+
+	/* no need to destroy vsched in case of mvpr failure */
+	return vsched_mvpr(p, node->vsched);
+}
+
+int do_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	struct task_struct *p;
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -ENOENT;
+	node = fairsched_find(nodeid);
+	if (node == NULL)
+		goto out;
+
+	read_lock(&tasklist_lock);
+	retval = -ESRCH;
+	p = find_task_by_pid_all(pid);
+	if (p == NULL)
+		goto out_unlock;
+	get_task_struct(p);
+	read_unlock(&tasklist_lock);
+
+	retval = __do_fairsched_mvpr(p, node);
+	put_task_struct(p);
+	return retval;
+
+out_unlock:
+	read_unlock(&tasklist_lock);
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	down(&fairsched_mutex);
+	retval = do_fairsched_mvpr(pid, nodeid);
+	up(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_mvpr);
+
+
+/*********************************************************************/
+/*
+ * proc interface
+ */
+/*********************************************************************/
+
+struct fairsched_node_dump {
+#ifdef CONFIG_VE
+	envid_t veid;
+#endif
+	int id;
+	unsigned weight;
+	unsigned rate;
+	unsigned rate_limited : 1,
+		 delayed : 1;
+	fschtag_t start_tag;
+	fschvalue_t value;
+	cycles_t delay;
+	int nr_ready;
+	int nr_runnable;
+	int nr_pcpu;
+	int nr_tasks, nr_runtasks;
+};
+
+struct fairsched_dump {
+	int len, compat;
+	struct fairsched_node_dump nodes[0];
+};
+
+static struct fairsched_dump *fairsched_do_dump(int compat)
+{
+	int nr_nodes;
+	int len, i;
+	struct fairsched_dump *dump;
+	struct fairsched_node *node;
+	struct fairsched_node_dump *p;
+	unsigned long flags;
+
+start:
+	nr_nodes = (ve_is_super(get_exec_env()) ? fairsched_nr_nodes + 16 : 1);
+	len = sizeof(*dump) + nr_nodes * sizeof(dump->nodes[0]);
+	dump = ub_vmalloc(len);
+	if (dump == NULL)
+		goto out;
+
+	spin_lock_irqsave(&fairsched_lock, flags);
+	if (ve_is_super(get_exec_env()) && nr_nodes < fairsched_nr_nodes)
+		goto repeat;
+	p = dump->nodes;
+	list_for_each_entry_reverse(node, &fairsched_node_head, nodelist) {
+		if ((char *)p - (char *)dump >= len)
+			break;
+		p->nr_tasks = 0;
+		p->nr_runtasks = 0;
+#ifdef CONFIG_VE
+		if (!ve_accessible(node->owner_env, get_exec_env()))
+			continue;
+		p->veid = node->owner_env->veid;
+		if (compat) {
+			p->nr_tasks = atomic_read(&node->owner_env->pcounter);
+			for (i = 0; i < NR_CPUS; i++)
+				p->nr_runtasks +=
+					VE_CPU_STATS(node->owner_env, i)
+								->nr_running;
+			if (p->nr_runtasks < 0)
+				p->nr_runtasks = 0;
+		}
+#endif
+		p->id = node->id;
+		p->weight = node->weight;
+		p->rate = node->rate;
+		p->rate_limited = node->rate_limited;
+		p->delayed = node->delayed;
+		p->start_tag = node->start_tag;
+		p->value = node->value;
+		p->delay = node->delay;
+		p->nr_ready = node->nr_ready;
+		p->nr_runnable = node->nr_runnable;
+		p->nr_pcpu = node->nr_pcpu;
+		p++;
+	}
+	dump->len = p - dump->nodes;
+	dump->compat = compat;
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+
+out:
+	return dump;
+
+repeat:
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+	vfree(dump);
+	goto start;
+}
+
+#define FAIRSCHED_PROC_HEADLINES 2
+
+#if defined(CONFIG_VE)
+/*
+ * File format is dictated by compatibility reasons.
+ */
+static int fairsched_seq_show(struct seq_file *m, void *v)
+{
+	struct fairsched_dump *dump;
+	struct fairsched_node_dump *p;
+	unsigned vid, nid, pid, r;
+
+	dump = m->private;
+	p = (struct fairsched_node_dump *)((unsigned long)v & ~3UL);
+	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
+		if (p == dump->nodes)
+			seq_printf(m, "Version: 2.6 debug\n");
+		else if (p == dump->nodes + 1)
+			seq_printf(m,
+				       "      veid "
+				       "        id "
+				       "    parent "
+				       "weight "
+				       " rate "
+  				       "tasks "
+				       "  run "
+				       "cpus"
+				       " "
+				       "flg "
+				       "ready "
+				       "           start_tag "
+				       "               value "
+				       "               delay"
+				       "\n");
+	} else {
+		p -= FAIRSCHED_PROC_HEADLINES;
+		vid = nid = pid = 0;
+		r = (unsigned long)v & 3;
+		if (p == dump->nodes) {
+			if (r == 2)
+				nid = p->id;
+		} else {
+			if (!r)
+				nid = p->id;
+			else if (r == 1)
+				vid = pid = p->id;
+			else
+				vid = p->id, nid = 1;
+		}
+		seq_printf(m,
+			       "%10u "
+			       "%10u %10u %6u %5u %5u %5u %4u"
+			       " "
+			       " %c%c %5u %20Lu %20Lu %20Lu"
+			       "\n",
+			       vid,
+			       nid,
+			       pid,
+			       p->weight,
+			       p->rate,
+			       p->nr_tasks,
+			       p->nr_runtasks,
+			       p->nr_pcpu,
+			       p->rate_limited ? 'L' : '.',
+			       p->delayed ? 'D' : '.',
+			       p->nr_ready,
+			       p->start_tag.t,
+			       p->value.v,
+			       p->delay
+			       );
+	}
+
+	return 0;
+}
+
+static void *fairsched_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct fairsched_dump *dump;
+	unsigned long l;
+
+	dump = m->private;
+	if (*pos >= dump->len * 3 - 1 + FAIRSCHED_PROC_HEADLINES)
+		return NULL;
+	if (*pos < FAIRSCHED_PROC_HEADLINES)
+		return dump->nodes + *pos;
+	/* guess why... */
+	l = (unsigned long)(dump->nodes +
+		((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) / 3);
+	l |= ((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) % 3;
+	return (void *)l;
+}
+static void *fairsched_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return fairsched_seq_start(m, pos);
+}
+#endif
+
+static int fairsched2_seq_show(struct seq_file *m, void *v)
+{
+	struct fairsched_dump *dump;
+	struct fairsched_node_dump *p;
+
+	dump = m->private;
+	p = v;
+	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
+		if (p == dump->nodes)
+			seq_printf(m, "Version: 2.7" FAIRSHED_DEBUG "\n");
+		else if (p == dump->nodes + 1)
+			seq_printf(m,
+				       "        id "
+				       "weight "
+				       " rate "
+				       "  run "
+				       "cpus"
+#ifdef FAIRSHED_DEBUG
+				       " "
+				       "flg "
+				       "ready "
+				       "           start_tag "
+				       "               value "
+				       "               delay"
+#endif
+				       "\n");
+	} else {
+		p -= FAIRSCHED_PROC_HEADLINES;
+		seq_printf(m,
+			       "%10u %6u %5u %5u %4u"
+#ifdef FAIRSHED_DEBUG
+			       " "
+			       " %c%c %5u %20Lu %20Lu %20Lu"
+#endif
+			       "\n",
+			       p->id,
+			       p->weight,
+			       p->rate,
+			       p->nr_runnable,
+			       p->nr_pcpu
+#ifdef FAIRSHED_DEBUG
+			       ,
+			       p->rate_limited ? 'L' : '.',
+			       p->delayed ? 'D' : '.',
+			       p->nr_ready,
+			       p->start_tag.t,
+			       p->value.v,
+			       p->delay
+#endif
+			       );
+	}
+
+	return 0;
+}
+
+static void *fairsched2_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct fairsched_dump *dump;
+
+	dump = m->private;
+	if (*pos >= dump->len + FAIRSCHED_PROC_HEADLINES)
+		return NULL;
+	return dump->nodes + *pos;
+}
+static void *fairsched2_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return fairsched2_seq_start(m, pos);
+}
+static void fairsched2_seq_stop(struct seq_file *m, void *v)
+{
+}
+
+#ifdef CONFIG_VE
+static struct seq_operations fairsched_seq_op = {
+	.start		= fairsched_seq_start,
+	.next		= fairsched_seq_next,
+	.stop		= fairsched2_seq_stop,
+	.show		= fairsched_seq_show
+};
+#endif
+static struct seq_operations fairsched2_seq_op = {
+	.start		= fairsched2_seq_start,
+	.next		= fairsched2_seq_next,
+	.stop		= fairsched2_seq_stop,
+	.show		= fairsched2_seq_show
+};
+static int fairsched_seq_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct seq_file *m;
+	int compat;
+
+#ifdef CONFIG_VE
+	compat = (file->f_dentry->d_name.len == sizeof("fairsched") - 1);
+	ret = seq_open(file, compat ? &fairsched_seq_op : &fairsched2_seq_op);
+#else
+	compat = 0;
+	ret = seq_open(file, fairsched2_seq_op);
+#endif
+	if (ret)
+		return ret;
+	m = file->private_data;
+	m->private = fairsched_do_dump(compat);
+	if (m->private == NULL) {
+		seq_release(inode, file);
+		ret = -ENOMEM;
+	}
+	return ret;
+}
+static int fairsched_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	struct fairsched_dump *dump;
+
+	m = file->private_data;
+	dump = m->private;
+	m->private = NULL;
+	vfree(dump);
+	seq_release(inode, file);
+	return 0;
+}
+static struct file_operations proc_fairsched_operations = {
+	.open		= fairsched_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= fairsched_seq_release
+};
+
+
+/*********************************************************************/
+/*
+ * Fairsched initialization
+ */
+/*********************************************************************/
+
+int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
+			void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (!write || *valp == val)
+		return ret;
+
+	spin_lock_irq(&fairsched_lock);
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+	return ret;
+}
+
+static void fairsched_calibrate(void)
+{
+	fairsched_nr_cpus = num_online_cpus();
+	max_value = FSCHVALUE(cycles_per_jiffy * (fairsched_nr_cpus + 1));
+}
+
+void __init fairsched_init_early(void)
+{
+	printk(KERN_INFO "Virtuozzo Fair CPU scheduler\n");
+	fairsched_init_node.vcpus = num_online_cpus();
+	list_add(&fairsched_init_node.nodelist, &fairsched_node_head);
+	fairsched_nr_nodes++;
+}
+
+/*
+ * Note: this function is execute late in the initialization sequence.
+ * We ourselves need calibrated cycles and initialized procfs...
+ * The consequence of this late initialization is that start tags are
+ * efficiently ignored and each node preempts others on insertion.
+ * But it isn't a problem (only init node can be runnable).
+ */
+void __init fairsched_init_late(void)
+{
+	struct proc_dir_entry *entry;
+
+	if (get_cycles() == 0)
+		panic("FAIRSCHED: no TSC!\n");
+	fairsched_calibrate();
+	fairsched_recompute_max_latency();
+
+	entry = create_proc_glob_entry("fairsched", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &proc_fairsched_operations;
+	entry = create_proc_glob_entry("fairsched2", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &proc_fairsched_operations;
+}
+
+
+#else /* CONFIG_FAIRSCHED */
+
+
+/*********************************************************************/
+/*
+ * No Fairsched
+ */
+/*********************************************************************/
+
+asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
+				    unsigned int newid)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_rmnod(unsigned int id)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned int weight)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_vcpus(unsigned int id, unsigned int vcpus)
+{
+	return -ENOSYS;
+}
+
+void __init fairsched_init_late(void)
+{
+}
+
+#endif /* CONFIG_FAIRSCHED */
diff -Nurap linux-2.6.9-100.orig/kernel/fork.c linux-2.6.9-ve023stab054/kernel/fork.c
--- linux-2.6.9-100.orig/kernel/fork.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/fork.c	2011-06-15 19:26:22.000000000 +0400
@@ -20,6 +20,7 @@
 #include <linux/vmalloc.h>
 #include <linux/completion.h>
 #include <linux/namespace.h>
+#include <linux/file.h>
 #include <linux/personality.h>
 #include <linux/mempolicy.h>
 #include <linux/sem.h>
@@ -27,6 +28,7 @@
 #include <linux/key.h>
 #include <linux/binfmts.h>
 #include <linux/mman.h>
+#include <linux/virtinfo.h>
 #include <linux/fs.h>
 #include <linux/cpu.h>
 #include <linux/security.h>
@@ -41,6 +43,7 @@
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/hash.h>
+#include <linux/fairsched.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -49,10 +52,15 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_misc.h>
+#include <ub/ub_vmpages.h>
+#include <ub/ub_mem.h>
+
 /* The idle threads do not count..
  * Protected by write_lock_irq(&tasklist_lock)
  */
 int nr_threads;
+EXPORT_SYMBOL(nr_threads);
 
 int max_threads;
 unsigned long total_forks;	/* Handle normal Linux uptimes. */
@@ -93,6 +101,7 @@ static kmem_cache_t *task_struct_cachep;
 
 void free_task(struct task_struct *tsk)
 {
+	ub_task_uncharge(tsk);
 	kfree(task_aux(tsk));
 	free_thread_info(tsk->thread_info);
 	free_task_struct(tsk);
@@ -109,9 +118,16 @@ void __put_task_struct(struct task_struc
 	free_uid(tsk->user);
 	put_group_info(tsk->group_info);
 
+#ifdef CONFIG_VE
+	put_ve(VE_TASK_INFO(tsk)->owner_env);
+	/* can't be protected by tasklist_lock, #74029 */
+	atomic_dec(&nr_dead);
+#endif
+
 	if (!profile_handoff_task(tsk))
 		free_task(tsk);
 }
+EXPORT_SYMBOL(__put_task_struct);
 
 void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
 {
@@ -250,7 +266,7 @@ void __init fork_init(unsigned long memp
 	/* create a slab on which task_structs can be allocated */
 	task_struct_cachep =
 		kmem_cache_create("task_struct", sizeof(struct task_struct),
-			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
+			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_UBC, NULL, NULL);
 #endif
 
 	/*
@@ -282,17 +298,12 @@ static struct task_struct *dup_task_stru
 		return NULL;
 
 	ti = alloc_thread_info(tsk);
-	if (!ti) {
-		free_task_struct(tsk);
-		return NULL;
-	}
+	if (ti == NULL)
+		goto out_free_task;
 
-	aux = kmalloc(sizeof(*aux), GFP_KERNEL);
-	if (!aux) {
-		free_thread_info(ti);
-		free_task_struct(tsk);
-		return NULL;
-	}
+	aux = ub_kmalloc(sizeof(*aux), GFP_KERNEL);
+	if (aux == NULL)
+		goto out_free_thread;
 
 	*ti = *orig->thread_info;
 	*aux = *task_aux(orig);
@@ -301,9 +312,24 @@ static struct task_struct *dup_task_stru
 	ti->task = tsk;
 	task_aux(tsk) = aux;
 
+	/* Our parent has been killed by OOM killer... Go away */
+	if (test_tsk_thread_flag(tsk, TIF_MEMDIE))
+		goto out_free_aux;
+
+	if (ub_task_charge(orig, tsk) < 0)
+		goto out_free_aux;
+
 	/* One for us, one for whoever does the "release_task()" (usually parent) */
 	atomic_set(&tsk->usage,2);
 	return tsk;
+
+out_free_aux:
+	kfree(aux);
+out_free_thread:
+	free_thread_info(ti);
+out_free_task:
+	free_task_struct(tsk);
+	return NULL;
 }
 
 /* Must be called with the mm_flags_lock held.  */
@@ -436,9 +462,14 @@ static inline int dup_mmap(struct mm_str
 		if (mpnt->vm_flags & VM_ACCOUNT) {
 			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
 			if (security_vm_enough_memory(len))
-				goto fail_nomem;
+				goto fail_nocharge;
 			charge = len;
 		}
+
+		if (ub_privvm_charge(mm_ub(mm), mpnt->vm_flags, mpnt->vm_file,
+					mpnt->vm_end - mpnt->vm_start))
+			goto fail_nocharge;
+
 		tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 		if (!tmp)
 			goto fail_nomem;
@@ -502,6 +533,9 @@ out:
 fail_nomem_policy:
 	kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
+	ub_privvm_uncharge(mm_ub(mm), mpnt->vm_flags, mpnt->vm_file,
+			mpnt->vm_end - mpnt->vm_start);
+fail_nocharge:
 	retval = -ENOMEM;
 	vm_unacct_memory(charge);
 	goto out;
@@ -528,12 +562,15 @@ static inline void mm_free_pgd(struct mm
 spinlock_t mmlist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 int mmlist_nr;
 
+EXPORT_SYMBOL(mmlist_lock);
+
 #define allocate_mm()	(kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
 #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
 
 #include <linux/init_task.h>
 
-static struct mm_struct * mm_init(struct mm_struct * mm)
+static struct mm_struct * mm_init(struct mm_struct * mm,
+		struct user_beancounter * ub)
 {
 	unsigned long mm_flags;
 
@@ -546,6 +583,9 @@ static struct mm_struct * mm_init(struct
 	mm->ioctx_list = NULL;
 	mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
+#ifdef CONFIG_USER_RESOURCE
+	mm_ub(mm) = get_beancounter(ub);
+#endif
 
 	mm_flags = get_mm_flags(current->mm);
 	if (mm_flags != MMF_DUMP_FILTER_DEFAULT) {
@@ -561,6 +601,7 @@ static struct mm_struct * mm_init(struct
 	if (mm_flags != MMF_DUMP_FILTER_DEFAULT)
 		free_mm_flags(mm);
 fail_nomem:
+	put_beancounter(mm_ub(mm));
 	free_mm(mm);
 	return NULL;
 }
@@ -575,10 +616,11 @@ struct mm_struct * mm_alloc(void)
 	mm = allocate_mm();
 	if (mm) {
 		memset(mm, 0, sizeof(*mm));
-		mm = mm_init(mm);
+		mm = mm_init(mm, get_exec_ub());
 	}
 	return mm;
 }
+EXPORT_SYMBOL(mm_alloc);
 
 /*
  * Called when the last reference to the mm
@@ -591,8 +633,10 @@ void fastcall __mmdrop(struct mm_struct 
 	free_mm_flags(mm);
 	mm_free_pgd(mm);
 	destroy_context(mm);
+	put_beancounter(mm_ub(mm));
 	free_mm(mm);
 }
+EXPORT_SYMBOL(__mmdrop);
 
 /*
  * Decrement the use count and release all resources for an mm.
@@ -606,6 +650,19 @@ void mmput(struct mm_struct *mm)
 		exit_aio(mm);
 		exit_mmap(mm);
 		put_swap_token(mm);
+		(void) virtinfo_gencall(VIRTINFO_EXITMMAP, mm);
+		if (mm->oom_killed) {
+			/* In order to allow OOM to happen from now on */
+			spin_lock(&oom_generation_lock);
+			oom_kill_counter = 0;
+			oom_generation++;
+			printk("OOM killed process %s (pid=%d, ve=%d) (mm=%p) "
+			       "exited, free=%u gen=%d.\n",
+			       current->comm, current->pid,
+			       VEID(VE_TASK_INFO(current)->owner_env),
+			       mm, nr_free_pages(), oom_generation);
+			spin_unlock(&oom_generation_lock);
+		}
 		mmdrop(mm);
 	}
 }
@@ -730,7 +787,7 @@ static int copy_mm(unsigned long clone_f
 
 	/* Copy the current MM stuff.. */
 	memcpy(mm, oldmm, sizeof(*mm));
-	if (!mm_init(mm))
+	if (!mm_init(mm, get_task_ub(tsk)))
 		goto fail_nomem;
 
 	if (init_new_context(tsk,mm))
@@ -757,6 +814,7 @@ fail_nocontext:
 	 */
 	free_mm_flags(mm);
 	mm_free_pgd(mm);
+	put_beancounter(mm_ub(mm));
 	free_mm(mm);
 	return retval;
 }
@@ -1047,7 +1105,7 @@ asmlinkage long sys_set_tid_address(int 
 {
 	current->clear_child_tid = tidptr;
 
-	return current->pid;
+	return virt_pid(current);
 }
 
 /*
@@ -1058,13 +1116,13 @@ asmlinkage long sys_set_tid_address(int 
  * parts of the process environment (as per the clone
  * flags). The actual kick-off is left to the caller.
  */
-static task_t *copy_process(unsigned long clone_flags,
+task_t *copy_process(unsigned long clone_flags,
 				 unsigned long stack_start,
 				 struct pt_regs *regs,
 				 unsigned long stack_size,
 				 int __user *parent_tidptr,
 				 int __user *child_tidptr,
-				 int pid)
+				 long pid, long vpid)
 {
 	int retval;
 	struct task_struct *p = NULL;
@@ -1101,7 +1159,7 @@ static task_t *copy_process(unsigned lon
 	if (atomic_read(&p->user->processes) >=
 			p->rlim[RLIMIT_NPROC].rlim_cur) {
 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
-				p->user != &root_user)
+				p->user->uid != 0)
 			goto bad_fork_free;
 	}
 
@@ -1126,12 +1184,24 @@ static task_t *copy_process(unsigned lon
 	p->did_exec = 0;
 	copy_flags(clone_flags, p);
 	p->pid = pid;
+	if (pid == 0)
+		set_virt_pid(p, 0);
+#ifdef CONFIG_VE
+	else {
+		set_virt_pid(p, alloc_vpid(p->pid, vpid ? : -1));
+		if (virt_pid(p) < 0)
+  			goto bad_fork_cleanup;
+	}
+#endif
 	retval = -EFAULT;
 	if (clone_flags & CLONE_PARENT_SETTID)
-		if (put_user(p->pid, parent_tidptr))
+		if (put_user(virt_pid(p), parent_tidptr))
 			goto bad_fork_cleanup;
 
 	p->proc_dentry = NULL;
+#ifdef CONFIG_VE
+	VE_TASK_INFO(p)->glob_proc_dentry = NULL;
+#endif
 
 	INIT_LIST_HEAD(&p->children);
 	INIT_LIST_HEAD(&p->sibling);
@@ -1216,6 +1286,9 @@ static task_t *copy_process(unsigned lon
 	 * We dont wake it up yet.
 	 */
 	p->tgid = p->pid;
+	set_virt_tgid(p, virt_pid(p));
+	set_virt_pgid(p, virt_pgid(current));
+	set_virt_sid(p, virt_sid(current));
 	p->group_leader = p;
 	INIT_LIST_HEAD(&p->ptrace_children);
 	INIT_LIST_HEAD(&p->ptrace_list);
@@ -1231,7 +1304,7 @@ static task_t *copy_process(unsigned lon
 	 * the parent's CPU. This avoids alot of nasty races.
 	 */
 	p->cpus_allowed = current->cpus_allowed;
-	set_task_cpu(p, smp_processor_id());
+	set_task_cpu(p, task_cpu(current));
 
 	/*
 	 * Check for pending SIGKILL! The new thread should not be allowed
@@ -1267,6 +1340,7 @@ static task_t *copy_process(unsigned lon
 			goto bad_fork_cleanup_namespace;
 		}
 		p->tgid = current->tgid;
+		set_virt_tgid(p, virt_tgid(current));
 		p->group_leader = current->group_leader;
 
 		if (current->signal->group_stop_count > 0) {
@@ -1284,6 +1358,13 @@ static task_t *copy_process(unsigned lon
 	if (unlikely(p->ptrace & PT_PTRACED))
 		__ptrace_link(p, current->parent);
 
+#ifdef CONFIG_VE
+	SET_VE_LINKS(p);
+	atomic_inc(&VE_TASK_INFO(p)->owner_env->pcounter);
+	get_ve(VE_TASK_INFO(p)->owner_env);
+	seqcount_init(&VE_TASK_INFO(p)->wakeup_lock);
+#endif
+
 	if (thread_group_leader(p)) {
 		attach_pid(p, PIDTYPE_PGID, process_group(p));
 		attach_pid(p, PIDTYPE_SID, p->signal->session);
@@ -1334,6 +1415,10 @@ bad_fork_cleanup_policy:
 bad_fork_cleanup:
 	if (p->binfmt)
 		module_put(p->binfmt->module);
+#ifdef CONFIG_VE
+	if (virt_pid(p) != p->pid && virt_pid(p) > 0)
+		free_vpid(virt_pid(p), get_exec_env());
+#endif
 bad_fork_cleanup_put_domain:
 	module_put(p->thread_info->exec_domain->module);
 bad_fork_cleanup_count:
@@ -1356,7 +1441,7 @@ task_t * __devinit fork_idle(int cpu)
 	task_t *task;
 	struct pt_regs regs;
 
-	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
+	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0, 0);
 	if (!task)
 		return ERR_PTR(-ENOMEM);
 	init_idle(task, cpu);
@@ -1386,26 +1471,35 @@ static inline int fork_traceflag (unsign
  * It copies the process, and if successful kick-starts
  * it and waits for it to finish using the VM if required.
  */
-long do_fork(unsigned long clone_flags,
+long do_fork_pid(unsigned long clone_flags,
 	      unsigned long stack_start,
 	      struct pt_regs *regs,
 	      unsigned long stack_size,
 	      int __user *parent_tidptr,
-	      int __user *child_tidptr)
+	      int __user *child_tidptr,
+	      long vpid)
 {
 	struct task_struct *p;
 	int trace = 0;
-	long pid = alloc_pidmap();
+	long pid;
 
-	if (pid < 0)
-		return -EAGAIN;
 	if (unlikely(current->ptrace)) {
 		trace = fork_traceflag (clone_flags);
 		if (trace)
 			clone_flags |= CLONE_PTRACE;
 	}
 
-	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+	pid = virtinfo_gencall(VIRTINFO_DOFORK, (void *)clone_flags);
+	if (pid)
+		return pid;
+
+	pid = alloc_pidmap();
+	if (pid < 0) {
+		pid = -EAGAIN;
+		goto out;
+	}
+
+	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid, vpid);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
@@ -1413,6 +1507,7 @@ long do_fork(unsigned long clone_flags,
 	if (!IS_ERR(p)) {
 		struct completion vfork;
 
+		pid = virt_pid(p);
 		if (clone_flags & CLONE_VFORK) {
 			task_aux(p)->vfork_done = &vfork;
 			init_completion(&vfork);
@@ -1426,6 +1521,7 @@ long do_fork(unsigned long clone_flags,
 			set_tsk_thread_flag(p, TIF_SIGPENDING);
 		}
 
+		virtinfo_gencall(VIRTINFO_DOFORKRET, p);
 		if (!(clone_flags & CLONE_STOPPED))
 			wake_up_new_task(p, clone_flags);
 		else
@@ -1434,21 +1530,43 @@ long do_fork(unsigned long clone_flags,
 
 		if (unlikely (trace)) {
 			current->ptrace_message = pid;
+			set_pn_state(current, PN_STOP_FORK);
 			ptrace_notify ((trace << 8) | SIGTRAP);
+			clear_pn_state(current);
 		}
 
 		if (clone_flags & CLONE_VFORK) {
 			wait_for_completion(&vfork);
-			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
+			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
+				set_pn_state(current, PN_STOP_VFORK);
 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
+				clear_pn_state(current);
+			}
 		}
 	} else {
 		free_pidmap(pid);
 		pid = PTR_ERR(p);
 	}
+
+out:
+	virtinfo_gencall(VIRTINFO_DOFORKPOST, (void *)(long)pid);
 	return pid;
 }
 
+EXPORT_SYMBOL(do_fork_pid);
+
+long do_fork(unsigned long clone_flags,
+	      unsigned long stack_start,
+	      struct pt_regs *regs,
+	      unsigned long stack_size,
+	      int __user *parent_tidptr,
+	      int __user *child_tidptr)
+{
+	return do_fork_pid(clone_flags, stack_start, regs, stack_size,
+			   parent_tidptr, child_tidptr, 0);
+}
+
+
 /* SLAB cache for signal_struct structures (tsk->signal) */
 kmem_cache_t *signal_cachep;
 
@@ -1467,24 +1585,26 @@ kmem_cache_t *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 kmem_cache_t *mm_cachep;
 
+#include <linux/kmem_cache.h>
 void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	files_cachep = kmem_cache_create("files_cache", 
 			sizeof(struct files_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+	files_cachep->flags |= CFLGS_ENVIDS;
 	fs_cachep = kmem_cache_create("fs_cache", 
 			sizeof(struct fs_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	vm_area_cachep = kmem_cache_create("vm_area_struct",
 			sizeof(struct vm_area_struct), 0,
-			SLAB_PANIC, NULL, NULL);
+			SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	mm_cachep = kmem_cache_create("mm_struct",
 			sizeof(struct mm_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 }
diff -Nurap linux-2.6.9-100.orig/kernel/kmod.c linux-2.6.9-ve023stab054/kernel/kmod.c
--- linux-2.6.9-100.orig/kernel/kmod.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/kmod.c	2011-06-15 19:26:22.000000000 +0400
@@ -78,6 +78,10 @@ int request_module(const char *fmt, ...)
 #define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
 	static int kmod_loop_msg;
 
+	/* Don't allow request_module() inside VE. */
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	va_start(args, fmt);
 	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
 	va_end(args);
@@ -115,29 +119,6 @@ int request_module(const char *fmt, ...)
 EXPORT_SYMBOL(request_module);
 #endif /* CONFIG_KMOD */
 
-#ifdef CONFIG_HOTPLUG
-/*
-	hotplug path is set via /proc/sys
-	invoked by hotplug-aware bus drivers,
-	with call_usermodehelper
-
-	argv [0] = hotplug_path;
-	argv [1] = "usb", "scsi", "pci", "network", etc;
-	... plus optional type-specific parameters
-	argv [n] = 0;
-
-	envp [*] = HOME, PATH; optional type-specific parameters
-
-	a hotplug bus should invoke this for device add/remove
-	events.  the command is expected to load drivers when
-	necessary, and may perform additional system setup.
-*/
-char hotplug_path[KMOD_PATH_LEN] = "/sbin/hotplug";
-
-EXPORT_SYMBOL(hotplug_path);
-
-#endif /* CONFIG_HOTPLUG */
-
 struct subprocess_info {
 	struct completion *complete;
 	char *path;
@@ -288,6 +269,9 @@ int call_usermodehelper(char *path, char
 	};
 	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
 
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	if (!khelper_wq)
 		return -EBUSY;
 
diff -Nurap linux-2.6.9-100.orig/kernel/ksysfs.c linux-2.6.9-ve023stab054/kernel/ksysfs.c
--- linux-2.6.9-100.orig/kernel/ksysfs.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ksysfs.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,56 @@
+/*
+ * kernel/ksysfs.c - sysfs attributes in /sys/kernel, which
+ * 		     are not related to any other subsystem
+ *
+ * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
+ * 
+ * This file is release under the GPLv2
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#define KERNEL_ATTR_RO(_name) \
+static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
+
+#define KERNEL_ATTR_RW(_name) \
+static struct subsys_attribute _name##_attr = \
+	__ATTR(_name, 0644, _name##_show, _name##_store)
+
+#ifdef CONFIG_HOTPLUG
+static ssize_t hotplug_seqnum_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%llu\n", hotplug_seqnum);
+}
+KERNEL_ATTR_RO(hotplug_seqnum);
+#endif
+
+static decl_subsys(kernel, NULL, NULL);
+
+static struct attribute * kernel_attrs[] = {
+#ifdef CONFIG_HOTPLUG
+	&hotplug_seqnum_attr.attr,
+#endif
+	NULL
+};
+
+static struct attribute_group kernel_attr_group = {
+	.attrs = kernel_attrs,
+};
+
+static int __init ksysfs_init(void)
+{
+	int error = subsystem_register(&kernel_subsys);
+	if (!error)
+		error = sysfs_create_group(&kernel_subsys.kset.kobj,
+					   &kernel_attr_group);
+
+	return error;
+}
+
+core_initcall(ksysfs_init);
diff -Nurap linux-2.6.9-100.orig/kernel/kthread.c linux-2.6.9-ve023stab054/kernel/kthread.c
--- linux-2.6.9-100.orig/kernel/kthread.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/kthread.c	2011-06-15 19:26:19.000000000 +0400
@@ -114,7 +114,7 @@ static void keventd_create_kthread(void 
 		create->result = ERR_PTR(pid);
 	} else {
 		wait_for_completion(&create->started);
-		create->result = find_task_by_pid(pid);
+		create->result = find_task_by_pid_all(pid);
 	}
 	complete(&create->done);
 }
diff -Nurap linux-2.6.9-100.orig/kernel/module.c linux-2.6.9-ve023stab054/kernel/module.c
--- linux-2.6.9-100.orig/kernel/module.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/module.c	2011-06-15 19:26:22.000000000 +0400
@@ -1125,7 +1125,7 @@ static int mod_sysfs_setup(struct module
 		return -ENOMEM;
 
 	memset(&mod->mkobj->kobj, 0, sizeof(mod->mkobj->kobj));
-	err = kobject_set_name(&mod->mkobj->kobj, mod->name);
+	err = kobject_set_name(&mod->mkobj->kobj, "%s", mod->name);
 	if (err)
 		goto out;
 	kobj_set_kset_s(mod->mkobj, module_subsys);
@@ -2068,6 +2068,8 @@ static void *m_start(struct seq_file *m,
 	loff_t n = 0;
 
 	down(&module_mutex);
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
 	list_for_each(i, &modules) {
 		if (n++ == *pos)
 			break;
diff -Nurap linux-2.6.9-100.orig/kernel/panic.c linux-2.6.9-ve023stab054/kernel/panic.c
--- linux-2.6.9-100.orig/kernel/panic.c	2011-06-09 19:22:59.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/panic.c	2011-06-15 19:26:22.000000000 +0400
@@ -21,10 +21,12 @@
 #include <linux/nmi.h>
 
 int panic_timeout;
-int panic_on_oops = 1;
+int panic_on_oops = 0;
 int panic_on_unrecovered_nmi;
 int tainted;
 int halt_on_dump_err = 0;
+int kernel_text_csum_broken;
+EXPORT_SYMBOL(kernel_text_csum_broken);
 
 EXPORT_SYMBOL(panic_timeout);
 EXPORT_SYMBOL_GPL(halt_on_dump_err);
@@ -84,6 +86,9 @@ NORET_TYPE void panic(const char * fmt, 
 
        notifier_call_chain(&panic_notifier_list, 0, buf);
 
+	/* avoid warnings from mdelay and many similar places */
+	preempt_count() = 1;
+
 	if (panic_timeout > 0 && !halt_on_dump_err)
 	{
 		/*
@@ -142,7 +147,8 @@ const char *print_tainted(void)
 {
 	static char buf[20];
 	if (tainted) {
-		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c",
+		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c",
+			kernel_text_csum_broken ? 'B' : ' ',
 			tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
 			tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
 			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
diff -Nurap linux-2.6.9-100.orig/kernel/pid.c linux-2.6.9-ve023stab054/kernel/pid.c
--- linux-2.6.9-100.orig/kernel/pid.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/pid.c	2011-06-15 19:26:21.000000000 +0400
@@ -26,6 +26,10 @@
 #include <linux/bootmem.h>
 #include <linux/hash.h>
 
+#ifdef CONFIG_VE
+static void __free_vpid(int vpid, struct ve_struct *ve);
+#endif
+
 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
 static struct hlist_head *pid_hash[PIDTYPE_MAX];
 static int pidhash_shift;
@@ -55,8 +59,14 @@ typedef struct pidmap {
 	void *page;
 } pidmap_t;
 
+#ifdef CONFIG_VE
+#define PIDMAP_NRFREE (BITS_PER_PAGE/2)
+#else
+#define PIDMAP_NRFREE BITS_PER_PAGE
+#endif
+
 static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
-	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
+	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(PIDMAP_NRFREE), NULL } };
 
 static spinlock_t pidmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
@@ -70,9 +80,12 @@ fastcall void free_pidmap(int pid)
 	pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
 	int offset = pid & BITS_PER_PAGE_MASK;
 
+	BUG_ON(__is_virtual_pid(pid) || pid == 1);
+
 	clear_bit(offset, map->page);
 	atomic_inc(&map->nr_free);
 }
+EXPORT_SYMBOL(free_pidmap);
 
 int alloc_pidmap(void)
 {
@@ -80,6 +93,8 @@ int alloc_pidmap(void)
 	pidmap_t *map;
 
 	pid = last + 1;
+	if (__is_virtual_pid(pid))
+		pid += VPID_DIV;
 	if (pid >= pid_max)
 		pid = RESERVED_PIDS;
 	offset = pid & BITS_PER_PAGE_MASK;
@@ -109,6 +124,8 @@ int alloc_pidmap(void)
 					return pid;
 				}
 				offset = find_next_offset(map, offset);
+				if (__is_virtual_pid(offset))
+					offset += VPID_DIV;
 				pid = mk_pid(map, offset);
 			/*
 			 * find_next_offset() found a bit, the pid from it
@@ -133,23 +150,7 @@ int alloc_pidmap(void)
 	}
 	return -1;
 }
-
-static int next_pidmap(int last)
-{
-	int offset;
-	pidmap_t *map;
-
-	offset = (last + 1) & BITS_PER_PAGE_MASK;
-	map = &pidmap_array[(last + 1)/BITS_PER_PAGE];
-	for (; map < &pidmap_array[PIDMAP_ENTRIES]; map++, offset = 0) {
-		if (unlikely(!map->page))
-			continue;
-		offset = find_next_bit(map->page, BITS_PER_PAGE, offset);
-		if (offset < BITS_PER_PAGE)
-			return mk_pid(map, offset);
-	}
-	return -1;
-}
+EXPORT_SYMBOL(alloc_pidmap);
 
 struct pid * fastcall find_pid(enum pid_type type, int nr)
 {
@@ -163,6 +164,7 @@ struct pid * fastcall find_pid(enum pid_
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(find_pid);
 
 int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 {
@@ -182,16 +184,20 @@ int fastcall attach_pid(task_t *task, en
 
 	return 0;
 }
+EXPORT_SYMBOL(attach_pid);
 
-static inline int __detach_pid(task_t *task, enum pid_type type)
+static fastcall int __detach_pid(task_t *task, enum pid_type type)
 {
 	struct pid *pid, *pid_next;
-	int nr;
+	int nr = 0;
 
 	pid = &task->pids[type];
 	if (!hlist_unhashed(&pid->pid_chain)) {
 		hlist_del(&pid->pid_chain);
-		if (!list_empty(&pid->pid_list)) {
+
+		if (list_empty(&pid->pid_list))
+			nr = pid->nr;
+		else {
 			pid_next = list_entry(pid->pid_list.next,
 						struct pid, pid_list);
 			/* insert next pid from pid_list to hash */
@@ -199,8 +205,8 @@ static inline int __detach_pid(task_t *t
 				&pid_hash[type][pid_hashfn(pid_next->nr)]);
 		}
 	}
+
 	list_del(&pid->pid_list);
-	nr = pid->nr;
 	pid->nr = 0;
 
 	return nr;
@@ -208,22 +214,38 @@ static inline int __detach_pid(task_t *t
 
 void fastcall detach_pid(task_t *task, enum pid_type type)
 {
+	int i;
 	int nr;
 
 	nr = __detach_pid(task, type);
 	if (!nr)
 		return;
 
-	for (type = 0; type < PIDTYPE_MAX; ++type)
-		if (find_pid(type, nr))
+	for (i = 0; i < PIDTYPE_MAX; ++i)
+		if (find_pid(i, nr))
 			return;
+
+#ifdef CONFIG_VE
+	__free_vpid(task->pids[type].vnr, VE_TASK_INFO(task)->owner_env);
+#endif
 	free_pidmap(nr);
 }
+EXPORT_SYMBOL(detach_pid);
 
 task_t *find_task_by_pid_type(int type, int nr)
 {
+	BUG();
+	return NULL;
+}
+
+EXPORT_SYMBOL(find_task_by_pid_type);
+
+task_t *find_task_by_pid_type_all(int type, int nr)
+{
 	struct pid *pid;
 
+	BUG_ON(nr != -1 && is_virtual_pid(nr));
+
 	pid = find_pid(type, nr);
 	if (!pid)
 		return NULL;
@@ -231,7 +253,35 @@ task_t *find_task_by_pid_type(int type, 
 	return pid_task(&pid->pid_list, type);
 }
 
-EXPORT_SYMBOL(find_task_by_pid_type);
+EXPORT_SYMBOL(find_task_by_pid_type_all);
+
+#ifdef CONFIG_VE
+
+task_t *find_task_by_pid_type_ve(int type, int nr)
+{
+	task_t *tsk;
+	int gnr = nr;
+	struct pid *pid;
+
+	if (is_virtual_pid(nr)) {
+		gnr = __vpid_to_pid(nr);
+		if (unlikely(gnr == -1))
+			return NULL;
+	}
+
+	pid = find_pid(type, gnr);
+	if (!pid)
+		return NULL;
+
+	tsk = pid_task(&pid->pid_list, type);
+	if (!ve_accessible(VE_TASK_INFO(tsk)->owner_env, get_exec_env()))
+		return NULL;
+	return tsk;
+}
+
+EXPORT_SYMBOL(find_task_by_pid_type_ve);
+
+#endif
 
 /*
  * This function switches the PIDs if a non-leader thread calls
@@ -250,12 +300,16 @@ void switch_exec_pids(task_t *leader, ta
 
 	leader->pid = leader->tgid = thread->pid;
 	thread->pid = thread->tgid;
+	set_virt_tgid(leader, virt_pid(thread));
+	set_virt_pid(leader, virt_pid(thread));
+	set_virt_pid(thread, virt_tgid(thread));
 
 	attach_pid(thread, PIDTYPE_PID, thread->pid);
 	attach_pid(thread, PIDTYPE_TGID, thread->tgid);
 	attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
 	attach_pid(thread, PIDTYPE_SID, thread->signal->session);
 	list_add_tail(&thread->tasks, &init_task.tasks);
+	SET_VE_LINKS(thread);
 
 	attach_pid(leader, PIDTYPE_PID, leader->pid);
 	attach_pid(leader, PIDTYPE_TGID, leader->tgid);
@@ -263,40 +317,370 @@ void switch_exec_pids(task_t *leader, ta
 	attach_pid(leader, PIDTYPE_SID, leader->signal->session);
 }
 
-/**
- * pid_alive - check that a task structure is not stale
- * @p: Task structure to be checked.
+#ifdef CONFIG_VE
+
+/* Virtual PID bits.
  *
- * Test if a process is not yet dead (at most zombie state)
- * If pid_alive fails, then pointers within the task structure
- * can be stale and must not be dereferenced.
+ * At the moment all internal structures in kernel store real global pid.
+ * The only place, where virtual PID is used, is at user frontend. We
+ * remap virtual pids obtained from user to global ones (vpid_to_pid) and
+ * map globals to virtuals before showing them to user (virt_pid_type).
+ *
+ * We hold virtual PIDs inside struct pid, so map global -> virtual is easy.
  */
-int pid_alive(struct task_struct *p)
+
+pid_t _pid_type_to_vpid(int type, pid_t pid)
 {
-	return p->pids[PIDTYPE_PID].nr != 0;
+	struct pid * p;
+
+	if (unlikely(is_virtual_pid(pid)))
+		return -1;
+
+	read_lock(&tasklist_lock);
+	p = find_pid(type, pid);
+	if (p) {
+		pid = p->vnr;
+	} else {
+		pid = -1;
+	}
+	read_unlock(&tasklist_lock);
+	return pid;
 }
+EXPORT_SYMBOL(_pid_type_to_vpid);
 
-/*
- * Used by proc to find the first pid that is greater then or equal to nr.
+pid_t pid_type_to_vpid(int type, pid_t pid)
+{
+	int vpid;
+
+	if (unlikely(pid <= 0))
+		return pid;
+
+	BUG_ON(is_virtual_pid(pid));
+
+	if (ve_is_super(get_exec_env()))
+		return pid;
+
+	vpid = _pid_type_to_vpid(type, pid);
+	if (unlikely(vpid == -1)) {
+		/* It is allowed: global pid can be used everywhere.
+		 * This can happen, when kernel remembers stray pids:
+		 * signal queues, locks etc.
+		 */
+		vpid = pid;
+	}
+	return vpid;
+}
+EXPORT_SYMBOL(pid_type_to_vpid);
+
+/* To map virtual pids to global we maintain special hash table.
  *
- * If there is a pid at nr this function is exactly the same as find_pid.
+ * Mapping entries are allocated when a process with non-trivial
+ * mapping is forked, which is possible only after VE migrated.
+ * Mappings are destroyed, when a global pid is removed from global
+ * pidmap, which means we do not need to refcount mappings.
  */
-struct pid *find_ge_pid(int nr)
+
+static struct hlist_head *vpid_hash;
+
+struct vpid_mapping
 {
-	struct pid *pid;
+	int	vpid;
+	int	veid;
+	int	pid;
+	struct hlist_node link;
+};
+
+static kmem_cache_t *vpid_mapping_cachep;
+
+static inline int vpid_hashfn(int vnr, int veid)
+{
+	return hash_long((unsigned long)(vnr+(veid<<16)), pidhash_shift);
+}
 
-	if (nr == 0)
-		nr = 1;
+struct vpid_mapping *__lookup_vpid_mapping(int vnr, int veid)
+{
+	struct hlist_node *elem;
+	struct vpid_mapping *map;
+
+	hlist_for_each_entry(map, elem,
+			&vpid_hash[vpid_hashfn(vnr, veid)], link) {
+		if (map->vpid == vnr && map->veid == veid)
+			return map;
+	}
+	return NULL;
+}
+
+/* __vpid_to_pid() is raw version of vpid_to_pid(). It is to be used
+ * only under tasklist_lock. In some places we must use only this version
+ * (f.e. __kill_pg_info is called under write lock!)
+ *
+ * Caller should pass virtual pid. This function returns an error, when
+ * seeing a global pid.
+ */
+int __vpid_to_pid(int pid)
+{
+	struct vpid_mapping *map;
 
-	do {
-		pid = find_pid(PIDTYPE_PID, nr);
-		if (pid)
-			break;
-		nr = next_pidmap(nr);
-	} while (nr > 0);
+	if (unlikely(!is_virtual_pid(pid) || ve_is_super(get_exec_env())))
+		return -1;
 
+	if (!get_exec_env()->sparse_vpid) {
+		int init_pid;
+
+		init_pid = get_exec_env()->init_entry->pid;
+		if (pid == 1)
+			return init_pid;
+		if (pid == init_pid + VPID_DIV)
+			return -1; /* vpid of init is 1 */
+		return pid - VPID_DIV;
+	}
+
+	map = __lookup_vpid_mapping(pid, VEID(get_exec_env()));
+	if (map)
+		return map->pid;
+	return -1;
+}
+EXPORT_SYMBOL(__vpid_to_pid);
+
+int vpid_to_pid(int pid)
+{
+	/* User gave bad pid. It is his problem. */
+	if (unlikely(pid <= 0))
+		return pid;
+
+	if (!is_virtual_pid(pid))
+		return pid;
+
+	read_lock(&tasklist_lock);
+	pid = __vpid_to_pid(pid);
+	read_unlock(&tasklist_lock);
 	return pid;
 }
+EXPORT_SYMBOL(vpid_to_pid);
+
+/* VEs which never migrated have trivial "arithmetic" mapping pid <-> vpid:
+ *
+ * vpid == 1 -> ve->init_task->pid
+ * else	        pid & ~VPID_DIV
+ *
+ * In this case VE has ve->sparse_vpid = 0 and we do not use vpid hash table.
+ *
+ * When VE migrates and we see non-trivial mapping the first time, we
+ * scan process table and populate mapping hash table.
+ */
+
+static int add_mapping(int pid, int vpid, int veid, struct hlist_head *cache)
+{
+        if (unlikely(pid <= 0 || vpid <= 0))
+		return 0;
+
+	/* VE can contain non-virtual (VE_ENTER'ed) processes when
+	 * switching to sparse mapping. We should not create mappings
+	 * for them. */
+	if (unlikely(!__is_virtual_pid(vpid) && vpid != 1)) {
+		printk("DEBUG (do not worry, but report): non-virtual pid while switching mode %d %d\n", pid, vpid);
+		return 0;
+	}
+
+	if (!__lookup_vpid_mapping(vpid, veid)) {
+		struct vpid_mapping *m;
+		if (hlist_empty(cache)) {
+			m = kmem_cache_alloc(vpid_mapping_cachep, GFP_ATOMIC);
+			if (unlikely(m == NULL))
+				return -ENOMEM;
+		} else {
+			m = hlist_entry(cache->first, struct vpid_mapping, link);
+			hlist_del(&m->link);
+		}
+		m->pid = pid;
+		m->vpid = vpid;
+		m->veid = veid;
+		hlist_add_head(&m->link,
+			       &vpid_hash[vpid_hashfn(vpid, veid)]);
+	}
+	return 0;
+}
+
+static int switch_to_sparse_mapping(int pid)
+{
+	struct ve_struct *env = get_exec_env();
+	struct hlist_head cache;
+	task_t *g, *t;
+	int pcount;
+	int err;
+
+	/* Transition happens under write_lock_irq, so we try to make
+	 * it more reliable and fast preallocating mapping entries.
+	 * pcounter may be not enough, we could have lots of orphaned
+	 * process groups and sessions, which also require mappings.
+	 */
+	INIT_HLIST_HEAD(&cache);
+	pcount = atomic_read(&env->pcounter);
+	err = -ENOMEM;
+	while (pcount > 0) {
+		struct vpid_mapping *m;
+		m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
+		if (!m)
+			goto out;
+		hlist_add_head(&m->link, &cache);
+		pcount--;
+	}
+
+	write_lock_irq(&tasklist_lock);
+	err = 0;
+	if (env->sparse_vpid)
+		goto out_unlock;
+
+	err = -ENOMEM;
+	do_each_thread_ve(g, t) {
+		if (t->pid == pid)
+			continue;
+		if (add_mapping(t->pid, virt_pid(t), VEID(env), &cache))
+			goto out_unlock;
+	} while_each_thread_ve(g, t);
+
+	for_each_process_ve(t) {
+		if (t->pid == pid)
+			continue;
+
+		if (add_mapping(t->tgid, virt_tgid(t), VEID(env), &cache))
+			goto out_unlock;
+		if (add_mapping(t->signal->pgrp, virt_pgid(t), VEID(env), &cache))
+			goto out_unlock;
+		if (add_mapping(t->signal->session, virt_sid(t), VEID(env), &cache))
+			goto out_unlock;
+	}
+	env->sparse_vpid = 1;
+	err = 0;
+
+out_unlock:
+	if (err) {
+		int i;
+
+		for (i=0; i<(1<<pidhash_shift); i++) {
+			struct hlist_node *elem, *next;
+			struct vpid_mapping *map;
+
+			hlist_for_each_entry_safe(map, elem, next, &vpid_hash[i], link) {
+				if (map->veid == VEID(env)) {
+					hlist_del(elem);
+					hlist_add_head(elem, &cache);
+				}
+			}
+		}
+	}
+	write_unlock_irq(&tasklist_lock);
+
+out:
+	while (!hlist_empty(&cache)) {
+		struct vpid_mapping *m;
+		m = hlist_entry(cache.first, struct vpid_mapping, link);
+		hlist_del(&m->link);
+		kmem_cache_free(vpid_mapping_cachep, m);
+	}
+	return err;
+}
+
+int alloc_vpid(int pid, int virt_pid)
+{
+	int result;
+	struct vpid_mapping *m;
+	struct ve_struct *env = get_exec_env();
+
+	if (ve_is_super(env) || !env->virt_pids)
+		return pid;
+
+	if (!env->sparse_vpid) {
+		if (virt_pid == -1)
+			return pid + VPID_DIV;
+
+		if (virt_pid == 1 || virt_pid == pid + VPID_DIV)
+			return virt_pid;
+
+		if ((result = switch_to_sparse_mapping(pid)) < 0)
+			return result;
+	}
+
+	m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
+	if (!m)
+		return -ENOMEM;
+
+	m->pid = pid;
+	m->veid = VEID(env);
+
+	result = (virt_pid == -1) ? pid + VPID_DIV : virt_pid;
+
+	write_lock_irq(&tasklist_lock);
+	if (unlikely(__lookup_vpid_mapping(result, m->veid))) {
+		if (virt_pid > 0) {
+			result = -EEXIST;
+			goto out;
+		}
+
+		/* No luck. Now we search for some not-existing vpid.
+		 * It is weak place. We do linear search. */
+		do {
+			result++;
+			if (!__is_virtual_pid(result))
+				result += VPID_DIV;
+			if (result >= pid_max)
+				result = RESERVED_PIDS + VPID_DIV;
+		} while (__lookup_vpid_mapping(result, m->veid) != NULL);
+
+		/* And set last_pid in hope future alloc_pidmap to avoid
+		 * collisions after future alloc_pidmap() */
+		last_pid = result - VPID_DIV;
+	}
+	if (result > 0) {
+		m->vpid = result;
+		hlist_add_head(&m->link,
+			       &vpid_hash[vpid_hashfn(result, m->veid)]);
+	}
+out:
+	write_unlock_irq(&tasklist_lock);
+	if (result < 0)
+		kmem_cache_free(vpid_mapping_cachep, m);
+	return result;
+}
+EXPORT_SYMBOL(alloc_vpid);
+
+static void __free_vpid(int vpid, struct ve_struct *ve)
+{
+	struct vpid_mapping *m;
+
+	if (!ve->sparse_vpid)
+		return;
+
+	if (!__is_virtual_pid(vpid) && (vpid != 1 || ve_is_super(ve)))
+		return;
+
+	m = __lookup_vpid_mapping(vpid, ve->veid);
+	BUG_ON(m == NULL);
+	hlist_del(&m->link);
+	kmem_cache_free(vpid_mapping_cachep, m);
+}
+
+void free_vpid(int vpid, struct ve_struct *ve)
+{
+	write_lock_irq(&tasklist_lock);
+	__free_vpid(vpid, ve);
+	write_unlock_irq(&tasklist_lock);
+}
+EXPORT_SYMBOL(free_vpid);
+#endif
+
+/**
+ * pid_alive - check that a task structure is not stale
+ * @p: Task structure to be checked.
+ *
+ * Test if a process is not yet dead (at most zombie state)
+ * If pid_alive fails, then pointers within the task structure
+ * can be stale and must not be dereferenced.
+ */
+int pid_alive(struct task_struct *p)
+{
+	return p->pids[PIDTYPE_PID].nr != 0;
+}
 
 /*
  * The pid hash table is scaled according to the amount of memory in the
@@ -324,6 +708,14 @@ void __init pidhash_init(void)
 		for (j = 0; j < pidhash_size; j++)
 			INIT_HLIST_HEAD(&pid_hash[i][j]);
 	}
+
+#ifdef CONFIG_VE
+	vpid_hash = alloc_bootmem(pidhash_size * sizeof(struct hlist_head));
+	if (!vpid_hash)
+		panic("Could not alloc vpid_hash!\n");
+	for (j = 0; j < pidhash_size; j++)
+		INIT_HLIST_HEAD(&vpid_hash[j]);
+#endif
 }
 
 void __init pidmap_init(void)
@@ -340,4 +732,12 @@ void __init pidmap_init(void)
 
 	for (i = 0; i < PIDTYPE_MAX; i++)
 		attach_pid(current, i, 0);
+
+#ifdef CONFIG_VE
+	vpid_mapping_cachep =
+		kmem_cache_create("vpid_mapping",
+				  sizeof(struct vpid_mapping),
+				  __alignof__(struct vpid_mapping),
+				  SLAB_PANIC|SLAB_UBC, NULL, NULL);
+#endif
 }
diff -Nurap linux-2.6.9-100.orig/kernel/posix-timers.c linux-2.6.9-ve023stab054/kernel/posix-timers.c
--- linux-2.6.9-100.orig/kernel/posix-timers.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/posix-timers.c	2011-06-15 19:26:19.000000000 +0400
@@ -31,6 +31,7 @@
  * POSIX clocks & timers
  */
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
@@ -223,7 +224,8 @@ static __init int init_posix_timers(void
 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
 
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
-					sizeof (struct k_itimer), 0, 0, NULL, NULL);
+					sizeof (struct k_itimer), 0, SLAB_UBC,
+					NULL, NULL);
 	idr_init(&posix_timers_id);
 	return 0;
 }
@@ -394,6 +396,11 @@ exit:
 static void timer_notify_task(struct k_itimer *timr)
 {
 	int ret;
+	struct ve_struct *old_ve;
+	struct user_beancounter *old_ub;
+	
+	old_ve = set_exec_env(VE_TASK_INFO(timr->it_process)->owner_env);
+	old_ub = set_exec_ub(task_bc(timr->it_process)->task_ub);
 
 	memset(&timr->sigq->info, 0, sizeof(siginfo_t));
 
@@ -440,6 +447,9 @@ static void timer_notify_task(struct k_i
 		 */
 		schedule_next_timer(timr);
 	}
+
+	(void)set_exec_ub(old_ub);
+	(void)set_exec_env(old_ve);
 }
 
 /*
@@ -499,7 +509,7 @@ static inline struct task_struct * good_
 	struct task_struct *rtn = current->group_leader;
 
 	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
-		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
+		(!(rtn = find_task_by_pid_ve(event->sigev_notify_thread_id)) ||
 		 rtn->tgid != current->tgid ||
 		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
 		return NULL;
@@ -1212,6 +1222,7 @@ int do_posix_clock_monotonic_gettime(str
 
 	return 0;
 }
+EXPORT_SYMBOL(do_posix_clock_monotonic_gettime);
 
 int do_posix_clock_monotonic_settime(struct timespec *tp)
 {
diff -Nurap linux-2.6.9-100.orig/kernel/power/process.c linux-2.6.9-ve023stab054/kernel/power/process.c
--- linux-2.6.9-100.orig/kernel/power/process.c	2011-06-09 19:22:45.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/power/process.c	2011-06-15 19:26:21.000000000 +0400
@@ -18,6 +18,7 @@
  */
 #define TIMEOUT	(6 * HZ)
 
+static atomic_t global_suspend = ATOMIC_INIT(0);
 
 static inline int freezeable(struct task_struct * p)
 {
@@ -32,7 +33,7 @@ static inline int freezeable(struct task
 }
 
 /* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(unsigned long flag)
+void refrigerator()
 {
 	/* Hmm, should we be allowed to suspend when there are realtime
 	   processes around? */
@@ -40,14 +41,24 @@ void refrigerator(unsigned long flag)
 	save = current->state;
 	current->state = TASK_UNINTERRUPTIBLE;
 	pr_debug("%s entered refrigerator\n", current->comm);
-	printk("=");
-	current->flags &= ~PF_FREEZE;
+	/* printk("="); */
 
 	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* We sent fake signal, clean it up */
+	if (test_and_clear_thread_flag(TIF_FREEZE)) {
+		recalc_sigpending(); /* We sent fake signal, clean it up */
+		if (atomic_read(&global_suspend) ||
+		    atomic_read(&get_exec_env()->suspend)) {
+			current->flags |= PF_FROZEN;
+		} else {
+			current->state = save;
+		}
+	} else {
+		/* Freeze request could be canceled before we entered
+		 * refrigerator(). In this case we do nothing. */
+		current->state = save;
+	}
 	spin_unlock_irq(&current->sighand->siglock);
 
-	current->flags |= PF_FROZEN;
 	while (current->flags & PF_FROZEN)
 		schedule();
 	pr_debug("%s left refrigerator\n", current->comm);
@@ -62,11 +73,12 @@ int freeze_processes(void)
 	struct task_struct *g, *p;
 	
 	printk( "Stopping tasks: " );
+	atomic_inc(&global_suspend);
 	start_time = jiffies;
 	do {
 		todo = 0;
 		read_lock(&tasklist_lock);
-		do_each_thread(g, p) {
+		do_each_thread_all(g, p) {
 			unsigned long flags;
 			if (!freezeable(p))
 				continue;
@@ -77,22 +89,25 @@ int freeze_processes(void)
 
 			/* FIXME: smp problem here: we may not access other process' flags
 			   without locking */
-			p->flags |= PF_FREEZE;
 			spin_lock_irqsave(&p->sighand->siglock, flags);
+			set_tsk_thread_flag(p, TIF_FREEZE);
 			signal_wake_up(p, 0);
 			spin_unlock_irqrestore(&p->sighand->siglock, flags);
 			todo++;
-		} while_each_thread(g, p);
+		} while_each_thread_all(g, p);
 		read_unlock(&tasklist_lock);
 		yield();			/* Yield is okay here */
 		if (time_after(jiffies, start_time + TIMEOUT)) {
 			printk( "\n" );
 			printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
+			atomic_dec(&global_suspend);
 			return todo;
 		}
 	} while(todo);
-	
-	printk( "|\n" );
+
+	atomic_dec(&global_suspend);
+
+	/* printk( "|\n" ); */
 	BUG_ON(in_atomic());
 	return 0;
 }
@@ -103,15 +118,18 @@ void thaw_processes(void)
 
 	printk( "Restarting tasks..." );
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
+		unsigned long flags;
 		if (!freezeable(p))
 			continue;
+		spin_lock_irqsave(&p->sighand->siglock, flags);
 		if (p->flags & PF_FROZEN) {
 			p->flags &= ~PF_FROZEN;
 			wake_up_process(p);
 		} else
 			printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
-	} while_each_thread(g, p);
+		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	} while_each_thread_all(g, p);
 
 	read_unlock(&tasklist_lock);
 	schedule();
diff -Nurap linux-2.6.9-100.orig/kernel/power/swsusp.c linux-2.6.9-ve023stab054/kernel/power/swsusp.c
--- linux-2.6.9-100.orig/kernel/power/swsusp.c	2004-10-19 01:53:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/power/swsusp.c	2011-06-15 19:26:19.000000000 +0400
@@ -246,7 +246,7 @@ static int write_page(unsigned long addr
 	swp_entry_t entry;
 	int error = 0;
 
-	entry = get_swap_page();
+	entry = get_swap_page(mm_ub(&init_mm));
 	if (swp_offset(entry) && 
 	    swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
 		error = rw_swap_page_sync(WRITE, entry,
diff -Nurap linux-2.6.9-100.orig/kernel/printk.c linux-2.6.9-ve023stab054/kernel/printk.c
--- linux-2.6.9-100.orig/kernel/printk.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/printk.c	2011-06-15 19:26:22.000000000 +0400
@@ -26,10 +26,13 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>			/* For in_interrupt() */
 #include <linux/config.h>
+#include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
 #include <linux/security.h>
 #include <linux/bootmem.h>
+#include <linux/vzratelimit.h>
+#include <linux/veprintk.h>
 
 #include <asm/uaccess.h>
 
@@ -53,8 +56,12 @@ int console_printk[4] = {
 
 EXPORT_SYMBOL(console_printk);
 
+int console_silence_loglevel;
 int oops_in_progress;
 
+struct printk_aligned printk_no_wake_var[NR_CPUS];
+EXPORT_SYMBOL(printk_no_wake_var);
+
 /*
  * console_sem protects the console_drivers list, and also
  * provides serialisation for access to the entire console
@@ -77,7 +84,7 @@ static int console_locked;
  * It is also used in interesting ways to provide interlocking in
  * release_console_sem().
  */
-static spinlock_t logbuf_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t logbuf_lock = SPIN_LOCK_UNLOCKED;
 
 static char __log_buf[__LOG_BUF_LEN];
 static char *log_buf = __log_buf;
@@ -151,6 +158,43 @@ static int __init console_setup(char *st
 
 __setup("console=", console_setup);
 
+static int __init setup_console_silencelevel(char *str)
+{
+	int level;
+
+	if (get_option(&str, &level) != 1)
+		return 0;
+
+	console_silence_loglevel = level;
+	return 1;
+}
+
+__setup("silencelevel=", setup_console_silencelevel);
+
+static inline int ve_log_init(void)
+{
+#ifdef CONFIG_VE
+	if (ve_log_buf != NULL)
+		return 0;
+
+	if (ve_is_super(get_exec_env())) {
+		ve0._log_wait = &log_wait;
+		ve0._log_start = &log_start;
+		ve0._log_end = &log_end;
+		ve0._logged_chars = &logged_chars;
+		ve0.log_buf = log_buf;
+		return 0;
+	}
+
+	ve_log_buf = kmalloc(ve_log_buf_len, GFP_ATOMIC);
+	if (!ve_log_buf)
+		return -ENOMEM;
+
+	memset(ve_log_buf, 0, ve_log_buf_len);
+#endif
+	return 0;
+}
+
 /**
  * add_preferred_console - add a device to the list of preferred consoles.
  *
@@ -257,6 +301,10 @@ int do_syslog(int type, char __user * bu
 	char c;
 	int error = 0;
 
+	if (!ve_is_super(get_exec_env()) &&
+			(type == 6 || type == 7 || type == 8))
+		goto out;
+
 	error = security_syslog(type);
 	if (error)
 		return error;
@@ -276,14 +324,15 @@ int do_syslog(int type, char __user * bu
 		error = verify_area(VERIFY_WRITE,buf,len);
 		if (error)
 			goto out;
-		error = wait_event_interruptible(log_wait, (log_start - log_end));
+		error = wait_event_interruptible(ve_log_wait,
+					(ve_log_start - ve_log_end));
 		if (error)
 			goto out;
 		i = 0;
 		spin_lock_irq(&logbuf_lock);
-		while (!error && (log_start != log_end) && i < len) {
-			c = LOG_BUF(log_start);
-			log_start++;
+		while (!error && (ve_log_start != ve_log_end) && i < len) {
+			c = VE_LOG_BUF(ve_log_start);
+			ve_log_start++;
 			spin_unlock_irq(&logbuf_lock);
 			error = __put_user(c,buf);
 			buf++;
@@ -308,15 +357,17 @@ int do_syslog(int type, char __user * bu
 		error = verify_area(VERIFY_WRITE,buf,len);
 		if (error)
 			goto out;
+		if (ve_log_buf == NULL)
+			goto out;
 		count = len;
-		if (count > log_buf_len)
-			count = log_buf_len;
+		if (count > ve_log_buf_len)
+			count = ve_log_buf_len;
 		spin_lock_irq(&logbuf_lock);
-		if (count > logged_chars)
-			count = logged_chars;
+		if (count > ve_logged_chars)
+			count = ve_logged_chars;
 		if (do_clear)
-			logged_chars = 0;
-		limit = log_end;
+			ve_logged_chars = 0;
+		limit = ve_log_end;
 		/*
 		 * __put_user() could sleep, and while we sleep
 		 * printk() could overwrite the messages 
@@ -325,9 +376,9 @@ int do_syslog(int type, char __user * bu
 		 */
 		for(i = 0; i < count && !error; i++) {
 			j = limit-1-i;
-			if (j + log_buf_len < log_end)
+			if (j + ve_log_buf_len < ve_log_end)
 				break;
-			c = LOG_BUF(j);
+			c = VE_LOG_BUF(j);
 			spin_unlock_irq(&logbuf_lock);
 			error = __put_user(c,&buf[count-1-i]);
 			cond_resched();
@@ -351,7 +402,7 @@ int do_syslog(int type, char __user * bu
 		}
 		break;
 	case 5:		/* Clear ring buffer */
-		logged_chars = 0;
+		ve_logged_chars = 0;
 		break;
 	case 6:		/* Disable logging to console */
 		console_loglevel = minimum_console_loglevel;
@@ -369,10 +420,10 @@ int do_syslog(int type, char __user * bu
 		error = 0;
 		break;
 	case 9:		/* Number of chars in the log buffer */
-		error = log_end - log_start;
+		error = ve_log_end - ve_log_start;
 		break;
 	case 10:	/* Size of the log buffer */
-		error = log_buf_len;
+		error = ve_log_buf_len;
 		break;
 	default:
 		error = -EINVAL;
@@ -486,14 +537,14 @@ static void call_console_drivers(unsigne
 
 static void emit_log_char(char c)
 {
-	LOG_BUF(log_end) = c;
-	log_end++;
-	if (log_end - log_start > log_buf_len)
-		log_start = log_end - log_buf_len;
-	if (log_end - con_start > log_buf_len)
+	VE_LOG_BUF(ve_log_end) = c;
+	ve_log_end++;
+	if (ve_log_end - ve_log_start > ve_log_buf_len)
+		ve_log_start = ve_log_end - ve_log_buf_len;
+	if (ve_is_super(get_exec_env()) && log_end - con_start > log_buf_len)
 		con_start = log_end - log_buf_len;
-	if (logged_chars < log_buf_len)
-		logged_chars++;
+	if (ve_logged_chars < ve_log_buf_len)
+		ve_logged_chars++;
 }
 
 /*
@@ -517,6 +568,8 @@ static void zap_locks(void)
 	init_MUTEX(&console_sem);
 }
 
+static unsigned long do_release_console_sem(unsigned long *flags);
+
 /*
  * This is printk.  It can be called from any context.  We want it to work.
  * 
@@ -530,25 +583,14 @@ static void zap_locks(void)
  * then changes console_loglevel may break. This is because console_loglevel
  * is inspected when the actual printing occurs.
  */
-asmlinkage int printk(const char *fmt, ...)
-{
-	va_list args;
-	int r;
-
-	va_start(args, fmt);
-	r = vprintk(fmt, args);
-	va_end(args);
-
-	return r;
-}
-
-asmlinkage int vprintk(const char *fmt, va_list args)
+asmlinkage int ve_vprintk(const char *fmt, va_list args)
 {
 	unsigned long flags;
 	int printed_len;
 	char *p;
 	static char printk_buf[1024];
 	static int log_level_unknown = 1;
+	int err, need_wake;
 
 	if (unlikely(oops_in_progress))
 		zap_locks();
@@ -556,6 +598,12 @@ asmlinkage int vprintk(const char *fmt, 
 	/* This stops the holder of console_sem just where we want him */
 	spin_lock_irqsave(&logbuf_lock, flags);
 
+	err = ve_log_init();
+	if (err) {
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+		return err;
+	}
+
 	/* Emit the output into the temporary buffer */
 	printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
 
@@ -594,7 +642,26 @@ asmlinkage int vprintk(const char *fmt, 
 		spin_unlock_irqrestore(&logbuf_lock, flags);
 		goto out;
 	}
-	if (!down_trylock(&console_sem)) {
+	if (!ve_is_super(get_exec_env())) {
+		need_wake = (ve_log_start != ve_log_end);
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+		if (!oops_in_progress && need_wake)
+			wake_up_interruptible(&ve_log_wait);
+	} else if (__printk_no_wake) {
+		/*
+		 * A difficult case, created by the console semaphore mess...
+		 * All wakeups are omitted.
+		 */
+		if (!atomic_add_negative(-1, &console_sem.count)) {
+			console_locked = 1;
+			console_may_schedule = 0;
+			do_release_console_sem(&flags);
+			console_locked = 0;
+			console_may_schedule = 0;
+		}
+		atomic_inc(&console_sem.count);
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+	} else if (!down_trylock(&console_sem)) {
 		console_locked = 1;
 		/*
 		 * We own the drivers.  We can drop the spinlock and let
@@ -614,8 +681,74 @@ asmlinkage int vprintk(const char *fmt, 
 out:
 	return printed_len;
 }
+
+asmlinkage int vprintk(const char *fmt, va_list args)
+{
+	int r;
+	struct ve_struct *env;
+
+	env = set_exec_env(get_ve0());
+	r = ve_vprintk(fmt, args);
+	set_exec_env(env);
+
+	return r;
+}
+
+static struct timer_list conswakeup_timer;
+static void conswakeup_timer_call(unsigned long dumy)
+{
+	if (!down_trylock(&console_sem)) {
+		console_locked = 1;
+		console_may_schedule = 0;
+		release_console_sem();
+	}
+	mod_timer(&conswakeup_timer, jiffies + 5 * HZ);
+}
+
+static int __init conswakeup_init(void)
+{
+	init_timer(&conswakeup_timer);
+	conswakeup_timer.function = &conswakeup_timer_call;
+	conswakeup_timer.expires = jiffies + 5 * HZ;
+	add_timer(&conswakeup_timer);
+	return 0;
+}
+console_initcall(conswakeup_init);
+
+asmlinkage int printk(const char *fmt, ...)
+{
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+	r = vprintk(fmt, args);
+	va_end(args);
+
+	return r;
+}
+
+asmlinkage int ve_printk(int dst, const char *fmt, ...)
+{
+	va_list args;
+	int printed_len;
+
+	printed_len = 0;
+	if (ve_is_super(get_exec_env()) || (dst & VE0_LOG)) {
+		va_start(args, fmt);
+		printed_len = vprintk(fmt, args);
+		va_end(args);
+	}
+	if (!ve_is_super(get_exec_env()) && (dst & VE_LOG)) {
+		va_start(args, fmt);
+		printed_len = ve_vprintk(fmt, args);
+		va_end(args);
+	}
+	return printed_len;
+}
+
 EXPORT_SYMBOL(printk);
 EXPORT_SYMBOL(vprintk);
+EXPORT_SYMBOL(ve_printk);
 
 /**
  * acquire_console_sem - lock the console system for exclusive use.
@@ -641,6 +774,12 @@ int is_console_locked(void)
 }
 EXPORT_SYMBOL(is_console_locked);
 
+void wake_up_klogd(void)
+{
+	if (!oops_in_progress && waitqueue_active(&log_wait))
+		wake_up_interruptible(&log_wait);
+}
+
 /**
  * release_console_sem - unlock the console system
  *
@@ -655,29 +794,39 @@ EXPORT_SYMBOL(is_console_locked);
  *
  * release_console_sem() may be called from any context.
  */
-void release_console_sem(void)
+static unsigned long do_release_console_sem(unsigned long *flags)
 {
-	unsigned long flags;
 	unsigned long _con_start, _log_end;
-	unsigned long wake_klogd = 0;
+	unsigned long wake_klogd;
 
+	wake_klogd = 0;
 	for ( ; ; ) {
-		spin_lock_irqsave(&logbuf_lock, flags);
 		wake_klogd |= log_start - log_end;
 		if (con_start == log_end)
 			break;			/* Nothing to print */
 		_con_start = con_start;
 		_log_end = log_end;
 		con_start = log_end;		/* Flush */
-		spin_unlock_irqrestore(&logbuf_lock, flags);
+		spin_unlock_irqrestore(&logbuf_lock, *flags);
 		call_console_drivers(_con_start, _log_end);
+		spin_lock_irqsave(&logbuf_lock, *flags);
 	}
+	return wake_klogd;
+}
+
+void release_console_sem(void)
+{
+	unsigned long wake_klogd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&logbuf_lock, flags);
+	wake_klogd = do_release_console_sem(&flags);
 	console_locked = 0;
 	console_may_schedule = 0;
 	up(&console_sem);
 	spin_unlock_irqrestore(&logbuf_lock, flags);
-	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
-		wake_up_interruptible(&log_wait);
+	if (wake_klogd)
+		wake_up_klogd();
 }
 EXPORT_SYMBOL(release_console_sem);
 
@@ -936,3 +1085,33 @@ int printk_ratelimit(void)
 				printk_ratelimit_burst);
 }
 EXPORT_SYMBOL(printk_ratelimit);
+
+/*
+ *	Rate limiting stuff.
+ */
+int vz_ratelimit(struct vz_rate_info *p)
+{
+	unsigned long cjif, djif;
+	unsigned long flags;
+	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
+	long new_bucket;
+
+	spin_lock_irqsave(&ratelimit_lock, flags);
+	cjif = jiffies;
+	djif = cjif - p->last;
+	if (djif < p->interval) {
+		if (p->bucket >= p->burst) {
+			spin_unlock_irqrestore(&ratelimit_lock, flags);
+			return 0;
+		}
+		p->bucket++;
+	} else {
+		new_bucket = p->bucket - (djif / (unsigned)p->interval);
+		if (new_bucket < 0)
+			new_bucket = 0;
+		p->bucket = new_bucket + 1;
+	}
+	p->last = cjif;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/ptrace.c linux-2.6.9-ve023stab054/kernel/ptrace.c
--- linux-2.6.9-100.orig/kernel/ptrace.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/ptrace.c	2011-06-15 19:26:21.000000000 +0400
@@ -130,7 +130,7 @@ int ptrace_attach(struct task_struct *ta
 	retval = -EPERM;
 	if (task->pid <= 1)
 		goto out;
-	if (task == current)
+	if (task->tgid == current->tgid)
 		goto out;
 
 repeat:
@@ -166,6 +166,10 @@ repeat:
 	rmb();
 	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
 		goto bad;
+	if (!task->mm->vps_dumpable && !ve_is_super(get_exec_env()))
+		goto bad;
+	if (task->mm->vps_dumpable == 2)
+		goto bad;
 	/* the same process cannot be attached many times */
 	if (task->ptrace & PT_PTRACED)
 		goto bad;
@@ -277,6 +281,7 @@ int access_process_vm(struct task_struct
 	
 	return buf - old_buf;
 }
+EXPORT_SYMBOL(access_process_vm);
 
 int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
 {
diff -Nurap linux-2.6.9-100.orig/kernel/sched.c linux-2.6.9-ve023stab054/kernel/sched.c
--- linux-2.6.9-100.orig/kernel/sched.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/sched.c	2011-06-15 19:26:22.000000000 +0400
@@ -45,6 +45,8 @@
 #include <linux/seq_file.h>
 #include <linux/times.h>
 #include <linux/kprobes.h>
+#include <linux/vsched.h>
+#include <linux/fairsched.h>
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
@@ -140,7 +142,7 @@
 #ifdef CONFIG_SMP
 #define TIMESLICE_GRANULARITY(p)	(MIN_TIMESLICE * \
 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
-			num_online_cpus())
+			vsched_num_online_vcpus(task_vsched(p)))
 #else
 #define TIMESLICE_GRANULARITY(p)	(MIN_TIMESLICE * \
 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
@@ -221,6 +223,7 @@ struct prio_array {
  * (such as the load balancing or the thread migration code), lock
  * acquire operations must be ordered by ascending &runqueue.
  */
+typedef struct vcpu_info *vcpu_t;
 struct runqueue {
 	spinlock_t lock;
 
@@ -247,7 +250,7 @@ struct runqueue {
 
 	unsigned long switch_timestamp;
 	unsigned long long timestamp_last_tick;
-	task_t *curr, *idle;
+	task_t *curr;
 	struct mm_struct *prev_mm;
 	prio_array_t *active, *expired, arrays[2];
 	int best_expired_prio;
@@ -258,12 +261,11 @@ struct runqueue {
 
 	/* For active balancing */
 	int active_balance;
-	int push_cpu;
+#endif
+	vcpu_t push_cpu;
 
 	task_t *migration_thread;
 	struct list_head migration_queue;
-	int cpu;
-#endif
 
 #ifdef CONFIG_SCHEDSTATS
 	/* latency stats */
@@ -308,7 +310,52 @@ struct runqueue {
 #endif
 };
 
-static DEFINE_PER_CPU(struct runqueue, runqueues);
+/* VCPU scheduler state description */
+struct vcpu_info;
+struct vcpu_scheduler {
+	struct list_head idle_list;
+	struct list_head active_list;
+	struct list_head running_list;
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node;
+#endif
+	struct vcpu_info *vcpu[NR_CPUS];
+	int id;
+	cpumask_t vcpu_online_map, vcpu_running_map;
+	cpumask_t pcpu_running_map;
+	int num_online_vcpus;
+} ____cacheline_maxaligned_in_smp;
+
+/* virtual CPU description */
+struct vcpu_info {
+	struct runqueue rq;
+#ifdef CONFIG_SCHED_VCPU
+	unsigned active : 1,
+		 running : 1;
+	struct list_head list;
+	struct vcpu_scheduler *vsched;
+	int last_pcpu;
+	unsigned long start_time;
+	unsigned long stop_time;
+#endif
+	int id;
+} ____cacheline_maxaligned_in_smp;
+
+/* physical CPU description */
+struct pcpu_info {
+	struct vcpu_scheduler *vsched;
+	struct vcpu_info *vcpu;
+	task_t *idle;
+#ifdef CONFIG_SMP
+	struct sched_domain *sd;
+#endif
+	int id;
+} ____cacheline_maxaligned_in_smp;
+
+struct pcpu_info pcpu_info[NR_CPUS];
+
+#define pcpu(nr)		(&pcpu_info[nr])
+#define this_pcpu()		(pcpu(smp_processor_id()))
 
 /*
  * sched-domains (multiprocessor balancing) declarations:
@@ -452,22 +499,724 @@ struct sched_domain {
 
 
 #define for_each_domain(cpu, domain) \
-	for (domain = cpu_rq(cpu)->sd; domain; domain = domain->parent)
+	for (domain = vcpu_rq(cpu)->sd; domain; domain = domain->parent)
+
+#ifdef CONFIG_SCHED_VCPU
+
+/* Used in find_idle_vsched() */
+static DEFINE_PER_CPU(int, find_busvs_last_pcpu);
+
+u32 vcpu_sched_timeslice = 5;
+u32 vcpu_timeslice = 0;
+u32 vcpu_hot_timeslice = 4;	/* < 4 won't work for HZ=250 */
+EXPORT_SYMBOL(vcpu_sched_timeslice);
+EXPORT_SYMBOL(vcpu_timeslice);
+EXPORT_SYMBOL(vcpu_hot_timeslice);
+
+extern spinlock_t fairsched_lock;
+static struct vcpu_scheduler default_vsched, idle_vsched;
+static struct vcpu_info boot_vcpu, boot_idle_vcpu;
+
+#define vsched_default_vsched()	(&default_vsched)
+#define vsched_default_vcpu(id)	(default_vsched.vcpu[id])
+
+/* 
+ * All macroses below could be used without locks, if there is no
+ * strict ordering requirements, because we assume, that:
+ *
+ * 1. VCPU could not disappear "on the fly" (FIXME)
+ *
+ * 2. p->vsched access is atomic.
+ */
+
+#define task_vsched(tsk)	((tsk)->vsched)
+#define this_vsched()		(task_vsched(current))
+
+#define vsched_vcpu(vsched, id)	((vsched)->vcpu[id])
+#define this_vcpu()		(task_vcpu(current))
+#define task_vcpu(p)		((p)->vcpu)
+
+#define vsched_id(vsched)	((vsched)->id)
+#define vsched_vcpu_online_map(vsched)	((vsched)->vcpu_online_map)
+#define vsched_num_online_vcpus(vsched)	((vsched)->num_online_vcpus)
+#define vsched_pcpu_running_map(vsched)	((vsched)->pcpu_running_map)
+
+#define vcpu_vsched(vcpu)	((vcpu)->vsched)
+#define vcpu_last_pcpu(vcpu)	((vcpu)->last_pcpu)
+#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
+#define vcpu_is_offline(vcpu)	(!vcpu_isset(vcpu, \
+					vcpu_vsched(vcpu)->vcpu_online_map))
+
+static int __add_vcpu(struct vcpu_scheduler *vsched, int id);
+
+#else	/* CONFIG_SCHED_VCPU */
+
+static DEFINE_PER_CPU(struct vcpu_info, vcpu_info);
+
+#define task_vsched(p)		NULL
+#define this_vcpu()		(task_vcpu(current))
+#define task_vcpu(p)		(vcpu(task_cpu(p)))
+
+#define vsched_vcpu(sched, id)	(vcpu(id))
+#define vsched_id(vsched)	0
+#define vsched_default_vsched()	NULL
+#define vsched_default_vcpu(id)	(vcpu(id))
+
+#define vsched_vcpu_online_map(vsched)	(cpu_online_map)
+#define vsched_num_online_vcpus(vsched)	(num_online_cpus())
+#define vsched_pcpu_running_map(vsched)	(cpu_online_map)
+
+#define vcpu(id)		(&per_cpu(vcpu_info, id))
+
+#define vcpu_vsched(vcpu)	NULL
+#define vcpu_last_pcpu(vcpu)	((vcpu)->id)
+#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
+#define vcpu_is_offline(vcpu)	(cpu_is_offline((vcpu)->id))
+
+#endif	/* CONFIG_SCHED_VCPU */
+
+#define this_rq()		(vcpu_rq(this_vcpu()))
+#define task_rq(p)		(vcpu_rq(task_vcpu(p)))
+#define vcpu_rq(vcpu)		(&(vcpu)->rq)
+#define get_vcpu()		({ preempt_disable(); this_vcpu(); })
+#define put_vcpu()		({ put_cpu(); })
+#define rq_vcpu(__rq)		(container_of((__rq), struct vcpu_info, rq))
+
+task_t *idle_task(int cpu) 
+{
+	return pcpu(cpu)->idle;
+}
+
+int wake_balance=1;
+
+#define LOWLAT_ENABLED()		(wake_balance == 2)
+
+#ifdef CONFIG_SMP
+static unsigned long current_load(runqueue_t *rq)
+{
+	unsigned long load = rq->nr_running * SCHED_LOAD_SCALE;
+
+	if (LOWLAT_ENABLED())
+		load += (SCHED_LOAD_SCALE * rq->irq_pct) / 100;
+	return load;
+}
+
+static inline void update_rq_cpu_load(runqueue_t *rq)
+{
+	unsigned long old_load, this_load;
+
+	if (rq->nr_running == 0) {
+		rq->cpu_load = 0;
+		return;
+	}
+
+	old_load = rq->cpu_load;
+	this_load = current_load(rq);
+	/*
+	 * Round up the averaging division if load is increasing. This
+	 * prevents us from getting stuck on 9 if the load is 10, for
+	 * example.
+	 */
+	if (this_load > old_load)
+		old_load++;
+	rq->cpu_load = (old_load + this_load) / 2;
+}
+#else	/* CONFIG_SMP */
+static inline void update_rq_cpu_load(runqueue_t *rq)
+{
+}
+#endif	/* CONFIG_SMP */
+
+#ifdef CONFIG_SCHED_VCPU
+
+void fastcall vsched_cpu_online_map(struct vcpu_scheduler *vsched,
+		cpumask_t *mask)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fairsched_lock, flags);
+	*mask = vsched->vcpu_online_map;
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+}
+
+static inline void set_task_vsched(task_t *p, struct vcpu_scheduler *vsched)
+{
+	/* NOTE: set_task_cpu() is required after every set_task_vsched()! */
+	p->vsched = vsched;
+	p->vsched_id = vsched_id(vsched);
+}
+
+inline void set_task_cpu(struct task_struct *p, unsigned int vcpu_id)
+{
+	p->vcpu = vsched_vcpu(task_vsched(p), vcpu_id);
+	p->vcpu_id = vcpu_id;
+}
+
+static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
+{
+	p->vcpu = vcpu;
+	p->vcpu_id = vcpu->id;
+}
+
+/* this is called when rq->nr_running changes from 0 to 1 */
+static void vcpu_attach(runqueue_t *rq)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+
+	vcpu = rq_vcpu(rq);
+	vsched = vcpu_vsched(vcpu);
+
+	BUG_ON(vcpu->active);
+	spin_lock(&fairsched_lock);
+	vcpu->active = 1;
+	if (!vcpu->running)
+		list_move_tail(&vcpu->list, &vsched->active_list);
+
+	fairsched_incrun(vsched->node);
+	spin_unlock(&fairsched_lock);
+}
+
+/* this is called when rq->nr_running changes from 1 to 0 */
+static void vcpu_detach(runqueue_t *rq)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+
+	vcpu = rq_vcpu(rq);
+	vsched = vcpu_vsched(vcpu);
+	BUG_ON(!vcpu->active);
+
+	spin_lock(&fairsched_lock);
+	fairsched_decrun(vsched->node);
+
+	vcpu->active = 0;
+	if (!vcpu->running)
+		list_move_tail(&vcpu->list, &vsched->idle_list);
+	spin_unlock(&fairsched_lock);
+}
+
+static inline void __vcpu_get(vcpu_t vcpu)
+{
+	struct pcpu_info *pcpu;
+	struct vcpu_scheduler *vsched;
+
+	BUG_ON(!this_vcpu()->running);
+
+	pcpu = this_pcpu();
+	vsched = vcpu_vsched(vcpu);
+
+	pcpu->vcpu = vcpu;
+	pcpu->vsched = vsched;
+
+	fairsched_inccpu(vsched->node);
+
+	list_move_tail(&vcpu->list, &vsched->running_list);
+	vcpu->start_time = jiffies;
+	vcpu->last_pcpu = pcpu->id;
+	vcpu->running = 1;
+	__set_bit(vcpu->id, vsched->vcpu_running_map.bits);
+	__set_bit(pcpu->id, vsched->pcpu_running_map.bits);
+#ifdef CONFIG_SMP
+	vcpu_rq(vcpu)->sd = pcpu->sd;
+#endif
+}
+
+static void vcpu_put(vcpu_t vcpu)
+{
+	struct vcpu_scheduler *vsched;
+	struct pcpu_info *cur_pcpu;
+	runqueue_t *rq;
+
+	vsched = vcpu_vsched(vcpu);
+	rq = vcpu_rq(vcpu);
+	cur_pcpu = this_pcpu();
+
+	BUG_ON(!vcpu->running);
+
+	spin_lock(&fairsched_lock);
+	vcpu->running = 0;
+	list_move_tail(&vcpu->list,
+		vcpu->active ? &vsched->active_list : &vsched->idle_list);
+	fairsched_deccpu(vsched->node);
+	__clear_bit(vcpu->id, vsched->vcpu_running_map.bits);
+	if (vsched != this_vsched())
+		__clear_bit(cur_pcpu->id, vsched->pcpu_running_map.bits);
+
+	vcpu->stop_time = jiffies;
+	if (!vcpu->active)
+		rq->switch_timestamp = jiffies;
+	/* from this point task_running(prev_rq, prev) will be 0 */
+	rq->curr = cur_pcpu->idle;
+	update_rq_cpu_load(rq);
+	spin_unlock(&fairsched_lock);
+}
+
+/*
+ * Find an idle VCPU in given vsched. VCPU runned on this_pcpu is 
+ * preferrable. Idle VCPU must be present in *cpus mask also.
+ */
+static vcpu_t find_idle_vcpu(struct vcpu_scheduler *vsched)
+{
+	vcpu_t vcpu;
+	vcpu_t best_vcpu;
+	int this_pcpu = smp_processor_id();
+
+	best_vcpu = NULL;
+
+	spin_lock(&fairsched_lock);
+	if (!list_empty(&vsched->idle_list)) {
+		list_for_each_entry(vcpu, &vsched->idle_list, list) {
+			if (unlikely(vcpu_is_offline(vcpu)))
+				continue;
+			best_vcpu = vcpu;
+			if (vcpu->last_pcpu == this_pcpu)
+				break;
+		}
+	}
+	spin_unlock(&fairsched_lock);
+	return best_vcpu;
+}
+
+/*
+ * find_busiest_vsched - find busiest vsched among running vsched's.
+ * An active vsched will be balanced when it becomes running.
+ *
+ * This routine must be simple and fast.
+ */
+static inline struct vcpu_scheduler *find_busiest_vsched(void)
+{
+	vcpu_t vcpu;
+	int i, n;
+	cpumask_t mask, tmp_mask;
+	int step;
+
+	step = 0;
+
+	mask = cpu_online_map;
+
+	/*
+	 * We implement simple round robin strategy to get 
+	 * PCPU id to start from. Last PCPU number is saved in 
+	 * per_cpu(find_busvs_last_pcpu).
+	 *
+	 * Assume the mask is 0x6789abcd and it's time to start
+	 * from PCPU #13:
+	 * 
+	 * 1) In the first pass we must use mask 0x6789a000:
+	 *
+	 *    ((0x6789abcd >> 13) << 13) => 0x6789a000
+	 *
+	 * 2) In the second pass we must use mask 0x00000bcd:
+	 *
+	 *      0x6789abcd ^ 0x6789a000  => 0x00000bcd
+	 */
+	n = per_cpu(find_busvs_last_pcpu, smp_processor_id());
+
+	cpus_shift_right(tmp_mask, mask, n);
+	cpus_shift_left(tmp_mask, tmp_mask, n);
+restart:
+	for_each_cpu_mask(i, tmp_mask) {
+		vcpu = pcpu(i)->vcpu;
+		if (vcpu_is_offline(vcpu))
+			continue;
+		if (vcpu->vsched == &idle_vsched)
+			continue;
+		if (vcpu == this_vcpu())
+			continue;
 
-#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-#define this_rq()		(&__get_cpu_var(runqueues))
-#define task_rq(p)		cpu_rq(task_cpu(p))
-#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
+		/*
+		 * 'Busiest' mean there at least 2 tasks on this vsched.
+		 */
+		if (vcpu->rq.nr_running > 1) {
+			per_cpu(find_busvs_last_pcpu, smp_processor_id())
+				= ++n % NR_CPUS;
+			return vcpu->vsched;
+		}
+	}
+	if (!step++) {
+		/* Second pass */
+		cpus_xor(tmp_mask, mask, tmp_mask);
+		goto restart;
+	}
+	return NULL;
+}
+
+/*
+ * Find idle VCPUs in a vsched, that can be balanced
+ */
+static inline vcpu_t find_idle_target(void)
+{
+	vcpu_t vcpu;
+	struct vcpu_scheduler *vsched;
+
+	/*
+	 * First of all we have to find busiest vsched
+	 */
+	vsched = find_busiest_vsched();
+	if (vsched == NULL)
+		return NULL;
+
+	/*
+	 * Try to find an idle VCPU in the target vsched.
+	 * VCPU that was last running on this PCPU is preferred.
+	 */
+	vcpu = find_idle_vcpu(vsched);
+	if (!vcpu)
+		return NULL;
+	return vcpu;
+}
+
+static vcpu_t schedule_vcpu(vcpu_t cur_vcpu, cycles_t cycles)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu, best_vcpu;
+	unsigned long time;
+	runqueue_t *rq;
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node, *nodec;
+
+	nodec = vcpu_vsched(cur_vcpu)->node;
+	node = nodec;
+#endif
+
+	BUG_ON(!cur_vcpu->running);
+restart:
+	spin_lock(&fairsched_lock);
+#ifdef CONFIG_FAIRSCHED
+	node = fairsched_schedule(node, nodec,
+			cur_vcpu->active,
+			cycles);
+	if (unlikely(node == NULL))
+		goto idle;
+
+	vsched = node->vsched;
+#else
+	vsched = &default_vsched;
+#endif
+	/* FIXME: optimize vcpu switching, maybe we do not need to call
+	   fairsched_schedule() at all if vcpu is still active and too
+	   little time have passed so far */
+	if (cur_vcpu->vsched == vsched && cur_vcpu->active &&
+	    jiffies - cur_vcpu->start_time < msecs_to_jiffies(vcpu_sched_timeslice)) {
+		vcpu = cur_vcpu;
+		goto done;
+	}
+
+	if (list_empty(&vsched->active_list)) {
+		/* nothing except for this cpu can be scheduled */
+		if (likely(cur_vcpu->vsched == vsched && cur_vcpu->active)) {
+			/* 
+			 * Current vcpu is the one we need. We have not
+			 * put it yet, so it's not on the active_list.
+			 */
+			vcpu = cur_vcpu;
+			vcpu->start_time = jiffies;
+			goto done;
+		} else
+			goto none;
+	}
+
+	/*
+	 * Ok, we are going to choose new VCPU now.
+	 */
+	time = jiffies - msecs_to_jiffies(vcpu_hot_timeslice);
+	/*
+	 * First vcpu in the list is more preferable, because it has waited
+	 * for CPU longer than others. If all vcpu's are hot, use the oldest
+	 * one.
+	 */
+	best_vcpu = list_entry(vsched->active_list.next,
+						struct vcpu_info, list);
+	list_for_each_entry(vcpu, &vsched->active_list, list) {
+		/* Skip hot VCPU's that were running on another CPU's */
+		if (vcpu->stop_time > time && 
+				vcpu_last_pcpu(vcpu) != smp_processor_id())
+			continue;
+
+		best_vcpu = vcpu;
+		break;
+	}
+	vcpu = best_vcpu;
+
+	/* add it to running list */
+	__vcpu_get(vcpu);
+done:
+	spin_unlock(&fairsched_lock);
+
+	rq = vcpu_rq(vcpu);
+	if (unlikely(vcpu != cur_vcpu)) {
+		spin_unlock(&vcpu_rq(cur_vcpu)->lock);
+		spin_lock(&rq->lock);
+		if (unlikely(!rq->nr_running)) {
+			/* race with balancing? */
+			spin_unlock(&rq->lock);
+			vcpu_put(vcpu);
+			spin_lock(&vcpu_rq(cur_vcpu)->lock);
+			goto restart;
+		}
+	}
+	BUG_ON(!rq->nr_running);
+	return vcpu;
+
+none:
+#ifdef CONFIG_FAIRSCHED
+	spin_unlock(&fairsched_lock);
+
+	/* fairsched doesn't schedule more CPUs than we have active */
+	BUG_ON(1);
+#else
+	goto idle;
+#endif
+
+idle:
+	vcpu = task_vcpu(this_pcpu()->idle);
+	__vcpu_get(vcpu);
+	spin_unlock(&fairsched_lock);
+	spin_unlock(&vcpu_rq(cur_vcpu)->lock);
+
+	spin_lock(&vcpu_rq(vcpu)->lock);
+	return vcpu;
+}
+
+#else /* CONFIG_SCHED_VCPU */
+
+#define set_task_vsched(task, vsched)		do { } while (0)
+
+static inline void vcpu_attach(runqueue_t *rq)
+{
+}
+
+static inline void vcpu_detach(runqueue_t *rq)
+{
+}
+
+static inline void vcpu_put(vcpu_t vcpu)
+{
+}
+
+static inline vcpu_t schedule_vcpu(vcpu_t prev_vcpu, cycles_t cycles)
+{
+	return prev_vcpu;
+}
+
+static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
+{
+	set_task_pcpu(p, vcpu->id);
+}
+
+#endif /* CONFIG_SCHED_VCPU */
+
+int vcpu_online(int cpu)
+{
+	return cpu_isset(cpu, vsched_vcpu_online_map(this_vsched()));
+}
 
 /*
  * Default context-switch locking:
  */
 #ifndef prepare_arch_switch
 # define prepare_arch_switch(rq, next)	do { } while (0)
-# define finish_arch_switch(rq, next)	spin_unlock_irq(&(rq)->lock)
+# define finish_arch_switch(rq, next)	spin_unlock(&(rq)->lock)
 # define task_running(rq, p)		((rq)->curr == (p))
 #endif
 
+struct kernel_stat_glob kstat_glob;
+spinlock_t kstat_glb_lock = SPIN_LOCK_UNLOCKED;
+EXPORT_SYMBOL(kstat_glob);
+EXPORT_SYMBOL(kstat_glb_lock);
+
+#ifdef CONFIG_VE
+
+#define ve_nr_running_inc(env, cpu)					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_running++;		\
+	} while(0)
+#define ve_nr_running_dec(env, cpu)					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_running--;		\
+	} while(0)
+#define ve_nr_iowait_inc(env, cpu) 					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_iowait++;		\
+	} while(0)
+#define ve_nr_iowait_dec(env, cpu)					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_iowait--;		\
+	} while(0)
+#define ve_nr_unint_inc(env, cpu)					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_unint++;			\
+	} while(0)
+#define ve_nr_unint_dec(env, cpu)					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_unint--;			\
+	} while(0)
+
+void ve_sched_attach(struct ve_struct *envid)
+{
+	struct task_struct *tsk;
+	unsigned int vcpu;
+
+	tsk = current;
+	preempt_disable();
+	vcpu = task_cpu(tsk);
+	ve_nr_running_dec(VE_TASK_INFO(tsk)->owner_env, vcpu);
+	ve_nr_running_inc(envid, vcpu);
+	preempt_enable();
+}
+EXPORT_SYMBOL(ve_sched_attach);
+
+#else
+
+#define ve_nr_running_inc(env, cpu)		do { } while(0)
+#define ve_nr_running_dec(env, cpu)		do { } while(0)
+#define ve_nr_iowait_inc(env, cpu)		do { } while(0)
+#define ve_nr_iowait_dec(env, cpu)		do { } while(0)
+#define ve_nr_unint_inc(env, cpu)		do { } while(0)
+#define ve_nr_unint_dec(env, cpu)		do { } while(0)
+
+#endif
+
+struct task_nrs_struct {
+	long nr_running;
+	long nr_uninterruptible;
+	long nr_stopped;
+	long nr_sleeping;
+	atomic_t nr_iowait;
+	long long nr_switches;
+} ____cacheline_aligned_in_smp;
+
+static struct task_nrs_struct glob_tasks_nrs[NR_CPUS];
+unsigned long nr_zombie = 0;	/* protected by tasklist_lock */
+EXPORT_SYMBOL(nr_zombie);
+
+atomic_t nr_dead = ATOMIC_INIT(0);
+EXPORT_SYMBOL(nr_dead);
+
+#define nr_running_inc(cpu, vcpu, ve) do {			\
+		glob_tasks_nrs[cpu].nr_running++;		\
+		ve_nr_running_inc(ve, vcpu);			\
+	} while (0)
+#define nr_running_dec(cpu, vcpu, ve) do {			\
+		glob_tasks_nrs[cpu].nr_running--;		\
+		ve_nr_running_dec(ve, vcpu);			\
+	} while (0)
+
+#define nr_unint_inc(cpu, vcpu, ve) do {			\
+		glob_tasks_nrs[cpu].nr_uninterruptible++;	\
+		ve_nr_unint_inc(ve, vcpu);			\
+	} while (0)
+#define nr_unint_dec(cpu, vcpu, ve) do {			\
+		glob_tasks_nrs[cpu].nr_uninterruptible--;	\
+		ve_nr_unint_dec(ve, vcpu);			\
+	} while (0)
+
+#define nr_iowait_inc(cpu) do {					\
+		atomic_inc(&glob_tasks_nrs[cpu].nr_iowait);	\
+	} while (0)
+#define nr_iowait_dec(cpu) do {					\
+		atomic_dec(&glob_tasks_nrs[cpu].nr_iowait);	\
+	} while (0)
+
+#define nr_stopped_inc(cpu, vcpu, ve) do {			\
+		glob_tasks_nrs[cpu].nr_stopped++;		\
+	} while (0)
+#define nr_stopped_dec(cpu, vcpu, ve) do {			\
+		glob_tasks_nrs[cpu].nr_stopped--;		\
+	} while (0)
+
+#define nr_sleeping_inc(cpu, vcpu, ve) do {			\
+		glob_tasks_nrs[cpu].nr_sleeping++;		\
+	} while (0)
+#define nr_sleeping_dec(cpu, vcpu, ve) do {			\
+		glob_tasks_nrs[cpu].nr_sleeping--;		\
+	} while (0)
+ 
+#if defined(CONFIG_VE) && defined(CONFIG_SCHED_VCPU)
+#define cycles_after(a, b)	((long long)(b) - (long long)(a) < 0)
+
+cycles_t ve_sched_get_idle_time(struct ve_struct *ve, int cpu)
+{
+	struct ve_cpu_stats *ve_stat;
+	unsigned v;
+	cycles_t strt, ret, cycles;
+
+	ve_stat = VE_CPU_STATS(ve, cpu);
+	do {
+		v = read_seqcount_begin(&ve_stat->stat_lock);
+		ret = ve_stat->idle_time;
+		strt = ve_stat->strt_idle_time;
+		if (strt && nr_uninterruptible_ve(ve) == 0) {
+			cycles = get_cycles();
+			if (cycles_after(cycles, strt))
+				ret += cycles - strt;
+		}
+	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
+	return ret;
+}
+
+cycles_t ve_sched_get_iowait_time(struct ve_struct *ve, int vcpu)
+{
+	struct ve_cpu_stats *ve_stat;
+	unsigned v;
+	cycles_t strt, ret, cycles;
+
+	ve_stat = VE_CPU_STATS(ve, vcpu);
+	do {
+		struct runqueue *rq;
+		rq = vcpu_rq(vsched_vcpu(this_vsched(), vcpu));
+		v = read_seqcount_begin(&ve_stat->stat_lock);
+		ret = ve_stat->iowait_time;
+		strt = ve_stat->strt_idle_time;
+		if (strt && atomic_read(&rq->nr_iowait) > 0) {
+			cycles = get_cycles();
+			if (cycles_after(cycles, strt))
+				ret += cycles - strt;
+		}
+	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
+	return ret;
+}
+
+static inline void vcpu_save_ve_idle(struct ve_struct *ve,
+		vcpu_t vcpu, cycles_t cycles)
+{
+	struct ve_cpu_stats *ve_stat;
+
+	ve_stat = VE_CPU_STATS(ve, vcpu->id);
+
+	write_seqcount_begin(&ve_stat->stat_lock);
+	if (ve_stat->strt_idle_time) {
+		if (cycles_after(cycles, ve_stat->strt_idle_time)) {
+			if (atomic_read(&vcpu_rq(vcpu)->nr_iowait) == 0)
+				ve_stat->idle_time += cycles -
+					ve_stat->strt_idle_time;
+			else
+				ve_stat->iowait_time += cycles - 
+					ve_stat->strt_idle_time;
+		}
+		ve_stat->strt_idle_time = 0;
+	}
+	write_seqcount_end(&ve_stat->stat_lock);
+}
+
+static inline void vcpu_strt_ve_idle(struct ve_struct *ve,
+		unsigned int vcpu, cycles_t cycles)
+{
+	struct ve_cpu_stats *ve_stat;
+
+	ve_stat = VE_CPU_STATS(ve, vcpu);
+
+	write_seqcount_begin(&ve_stat->stat_lock);
+	ve_stat->strt_idle_time = cycles;
+	write_seqcount_end(&ve_stat->stat_lock);
+}
+
+#else
+#define vcpu_save_ve_idle(ve, vcpu, cycles)	do { } while (0)
+#define vcpu_strt_ve_idle(ve, vcpu, cycles)	do { } while (0)
+#endif
+
 /*
  * task_rq_lock - lock the runqueue a given task resides on and disable
  * interrupts.  Note the ordering: we can safely lookup the task_rq without
@@ -780,13 +1529,39 @@ static int effective_prio(task_t *p)
 	return prio;
 }
 
+static inline void write_wakeup_stamp(struct task_struct *p, cycles_t cyc)
+{
+	struct ve_task_info *ti;
+
+	ti = VE_TASK_INFO(p);
+	write_seqcount_begin(&ti->wakeup_lock);
+	ti->wakeup_stamp = cyc;
+	write_seqcount_end(&ti->wakeup_lock);
+}
+
 /*
  * __activate_task - move a task to the runqueue.
  */
 static inline void __activate_task(task_t *p, runqueue_t *rq)
 {
+	cycles_t cycles;
+	unsigned int vcpu;
+	struct ve_struct *ve;
+
+	cycles = get_cycles();
+	vcpu = task_cpu(p);
+	ve = VE_TASK_INFO(p)->owner_env;
+
+	write_wakeup_stamp(p, cycles);
+	VE_TASK_INFO(p)->sleep_time += cycles;
+	nr_running_inc(smp_processor_id(), vcpu, ve);
+
 	enqueue_task(p, rq->active);
 	rq->nr_running++;
+	if (rq->nr_running == 1) {
+		vcpu_save_ve_idle(ve, task_vcpu(p), cycles);
+		vcpu_attach(rq);
+	}
 }
 
 /*
@@ -926,9 +1701,32 @@ static void activate_task(task_t *p, run
  */
 static void deactivate_task(struct task_struct *p, runqueue_t *rq)
 {
+	cycles_t cycles;
+	unsigned int cpu, vcpu;
+	struct ve_struct *ve;
+
+	cycles = get_cycles();
+	cpu = smp_processor_id();
+	vcpu = task_cpu(p);
+	ve = VE_TASK_INFO(p)->owner_env;
+
+	VE_TASK_INFO(p)->sleep_time -= cycles;
 	rq->nr_running--;
+	nr_running_dec(cpu, vcpu, ve);
+	if (p->state == TASK_UNINTERRUPTIBLE) {
+		nr_unint_inc(cpu, vcpu, ve);
+	}
+	if (p->state == TASK_INTERRUPTIBLE)
+		nr_sleeping_inc(cpu, vcpu, ve);
+	if (p->state == TASK_STOPPED)
+		nr_stopped_inc(cpu, vcpu, ve);
+	/* nr_zombie is calced in exit.c */
 	dequeue_task(p, p->array);
 	p->array = NULL;
+	if (rq->nr_running == 0) {
+		vcpu_strt_ve_idle(ve, vcpu, cycles);
+		vcpu_detach(rq);
+	}
 }
 
 /*
@@ -939,19 +1737,24 @@ static void deactivate_task(struct task_
  * the target CPU.
  */
 #ifdef CONFIG_SMP
+/* FIXME: need to add vsched arg */
 static void resched_task(task_t *p)
 {
 	int need_resched, nrpolling;
 
+#if 0
+	/* FIXME: this fails due to idle rq->curre == idle */
 	BUG_ON(!spin_is_locked(&task_rq(p)->lock));
+#endif
 
 	/* minimise the chance of sending an interrupt to poll_idle() */
 	nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
 	need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
 	nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
 
-	if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id()))
-		smp_send_reschedule(task_cpu(p));
+	/* FIXME: think over */
+	if (!need_resched && !nrpolling && (task_pcpu(p) != smp_processor_id()))
+		smp_send_reschedule(task_pcpu(p));
 }
 #else
 static inline void resched_task(task_t *p)
@@ -966,14 +1769,30 @@ static inline void resched_task(task_t *
  */
 inline int task_curr(const task_t *p)
 {
-	return cpu_curr(task_cpu(p)) == p;
+	return task_rq(p)->curr == p;
 }
 
-int wake_balance=1;
+EXPORT_SYMBOL(task_curr);
 
-#ifdef CONFIG_SMP
+/**
+ * idle_cpu - is a given cpu idle currently?
+ * @cpu: the processor in question.
+ */
+inline int idle_cpu(int cpu)
+{
+	return pcpu(cpu)->vsched == &idle_vsched;
+}
 
-#define LOWLAT_ENABLED()		(wake_balance == 2)
+EXPORT_SYMBOL_GPL(idle_cpu);
+
+static inline int idle_vcpu(vcpu_t cpu)
+{
+#ifdef CONFIG_SCHED_VCPU
+	return !cpu->active;
+#else
+	return idle_cpu(cpu->id);
+#endif
+}
 
 enum request_type {
 	REQ_MOVE_TASK,
@@ -986,7 +1805,7 @@ typedef struct {
 
 	/* For REQ_MOVE_TASK */
 	task_t *task;
-	int dest_cpu;
+	vcpu_t dest_cpu;
 
 	/* For REQ_SET_DOMAIN */
 	struct sched_domain *sd;
@@ -998,7 +1817,7 @@ typedef struct {
  * The task's runqueue lock must be held.
  * Returns true if you have to wait for migration thread.
  */
-static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
+static int migrate_task(task_t *p, vcpu_t dest_cpu, migration_req_t *req)
 {
 	runqueue_t *rq = task_rq(p);
 
@@ -1006,8 +1825,13 @@ static int migrate_task(task_t *p, int d
 	 * If the task is not on a runqueue (and not running), then
 	 * it is sufficient to simply update the task's cpu field.
 	 */
+#ifdef CONFIG_SCHED_VCPU
+	BUG_ON(task_vsched(p) == &idle_vsched);
+	BUG_ON(vcpu_vsched(dest_cpu) == &idle_vsched);
+#endif
 	if (!p->array && !task_running(rq, p)) {
-		set_task_cpu(p, dest_cpu);
+		set_task_vsched(p, vcpu_vsched(dest_cpu));
+		set_task_vcpu(p, dest_cpu);
 		return 0;
 	}
 
@@ -1019,6 +1843,7 @@ static int migrate_task(task_t *p, int d
 	return 1;
 }
 
+#ifdef CONFIG_SMP
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
@@ -1048,6 +1873,7 @@ repeat:
 	}
 	task_rq_unlock(rq, &flags);
 }
+EXPORT_SYMBOL(wait_task_inactive);
 
 /***
  * kick_process - kick a running thread to enter/exit the kernel
@@ -1061,30 +1887,24 @@ void kick_process(task_t *p)
 	int cpu;
 
 	preempt_disable();
-	cpu = task_cpu(p);
+	cpu = task_pcpu(p);
 	if ((cpu != smp_processor_id()) && task_curr(p))
+		/* FIXME: ??? think over */
+		/* should add something like get_pcpu(cpu)->vcpu->id == task_cpu(p),
+		   but with serialization of vcpu access... */
 		smp_send_reschedule(cpu);
 	preempt_enable();
 }
 
-static unsigned long current_load(runqueue_t *rq)
-{
-	unsigned long load = rq->nr_running * SCHED_LOAD_SCALE;
-
-	if (LOWLAT_ENABLED())
-		load += (SCHED_LOAD_SCALE * rq->irq_pct) / 100;
-	return load;
-}
-
 /*
  * Return a low guess at the load of a migration-source cpu.
  *
  * We want to under-estimate the load of migration sources, to
  * balance conservatively.
  */
-static inline unsigned long source_load(int cpu)
+static inline unsigned long source_load(vcpu_t cpu)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	runqueue_t *rq = vcpu_rq(cpu);
 
 	return min(rq->cpu_load, current_load(rq));
 }
@@ -1092,9 +1912,9 @@ static inline unsigned long source_load(
 /*
  * Return a high guess at the load of a migration-target cpu
  */
-static inline unsigned long target_load(int cpu)
+static inline unsigned long target_load(vcpu_t cpu)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	runqueue_t *rq = vcpu_rq(cpu);
 
 	return max(rq->cpu_load, current_load(rq));
 }
@@ -1109,20 +1929,21 @@ static inline unsigned long target_load(
  * Returns the CPU we should wake onto.
  */
 #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
-static int wake_idle(int cpu, task_t *p)
+static vcpu_t wake_idle(vcpu_t cpu, task_t *p)
 {
-	cpumask_t tmp;
-	runqueue_t *rq = cpu_rq(cpu);
+	cpumask_t tmp, vtmp;
+	runqueue_t *rq = vcpu_rq(cpu);
 	struct sched_domain *sd;
+	struct vcpu_scheduler *vsched;
 	int i;
 	unsigned long load = current_load(rq) + SCHED_LOAD_SCALE;
 
 	if (LOWLAT_ENABLED()) {
-		if (idle_cpu(cpu))
+		if (idle_vcpu(cpu))
 			load -= SCHED_LOAD_SCALE;
 		if (load <= SCHED_LOAD_SCALE)
 			return cpu;
-	} else if (idle_cpu(cpu)) {
+	} else if (idle_vcpu(cpu)) {
 		return cpu;
 	}
 
@@ -1130,30 +1951,37 @@ static int wake_idle(int cpu, task_t *p)
 	if (!(sd->flags & SD_WAKE_IDLE) && !LOWLAT_ENABLED())
 		return cpu;
 
-	cpus_and(tmp, sd->span, p->cpus_allowed);
+	vsched = vcpu_vsched(cpu);
+	cpus_and(tmp, sd->span, cpu_online_map);
+	cpus_and(vtmp, vsched_vcpu_online_map(vsched), p->cpus_allowed);
+
+	for_each_cpu_mask(i, vtmp) {
+		vcpu_t vcpu;
+		vcpu = vsched_vcpu(vsched, i);
+		if (!cpu_isset(vcpu_last_pcpu(vcpu), tmp))
+			continue;
 
-	for_each_cpu_mask(i, tmp) {
 		if (LOWLAT_ENABLED()) {
-			unsigned long l = current_load(cpu_rq(i))
+			unsigned long l = current_load(vcpu_rq(vcpu))
 							+ SCHED_LOAD_SCALE;
 
-			if (idle_cpu(i))
+			if (idle_vcpu(vcpu))
 				l -= SCHED_LOAD_SCALE;
 			if (l <= SCHED_LOAD_SCALE)
-				return i;
+				return vcpu;
 			if (l < load) {
-				cpu = i;
+				cpu = vcpu;
 				load = l;
 			}
-		} else if (idle_cpu(i)) {
-			return i;
+		} else if (idle_vcpu(vcpu)) {
+			return vcpu;
 		}
 	}
 
 	return cpu;
 }
 #else
-static inline int wake_idle(int cpu, task_t *p)
+static inline vcpu_t wake_idle(vcpu_t cpu, task_t *p)
 {
 	return cpu;
 }
@@ -1175,15 +2003,17 @@ static inline int wake_idle(int cpu, tas
  */
 static int try_to_wake_up(task_t * p, unsigned int state, int sync)
 {
-	int cpu, this_cpu, success = 0;
+	vcpu_t cpu, this_cpu;
+	int success = 0;
 	unsigned long flags;
 	long old_state;
 	runqueue_t *rq;
 #ifdef CONFIG_SMP
 	unsigned long load, this_load;
 	struct sched_domain *sd;
-	int new_cpu;
+	vcpu_t new_cpu;
 #endif
+	cpu = NULL;
 
 	rq = task_rq_lock(p, &flags);
 	schedstat_inc(rq, ttwu_cnt);
@@ -1194,8 +2024,8 @@ static int try_to_wake_up(task_t * p, un
 	if (p->array)
 		goto out_running;
 
-	cpu = task_cpu(p);
-	this_cpu = smp_processor_id();
+	cpu = task_vcpu(p);
+	this_cpu = this_vcpu();
 
 #ifdef CONFIG_SMP
 	if (unlikely(task_running(rq, p)))
@@ -1206,7 +2036,12 @@ static int try_to_wake_up(task_t * p, un
 	if (!wake_balance)
 		goto out_set_cpu;
 
-	if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
+	/* FIXME: add vsched->last_vcpu array to optimize wakeups in different vsched */
+	if (vcpu_vsched(cpu) != vcpu_vsched(this_cpu))
+		goto out_set_cpu;
+	if (cpu == this_cpu || unlikely(!vcpu_isset(this_cpu, p->cpus_allowed)))
+		goto out_set_cpu;
+	if (vcpu_is_offline(this_cpu))
 		goto out_set_cpu;
 
 	load = source_load(cpu);
@@ -1243,7 +2078,7 @@ static int try_to_wake_up(task_t * p, un
 			 * This domain has SD_WAKE_AFFINE and p is cache cold
 			 * in this domain.
 			 */
-			if (cpu_isset(cpu, sd->span)) {
+			if (cpu_isset(vcpu_last_pcpu(cpu), sd->span)) {
 				schedstat_inc(sd, ttwu_wake_affine);
 				goto out_set_cpu;
 			}
@@ -1253,7 +2088,7 @@ static int try_to_wake_up(task_t * p, un
 			 * This domain has SD_WAKE_BALANCE and there is
 			 * an imbalance.
 			 */
-			if (cpu_isset(cpu, sd->span)) {
+			if (cpu_isset(vcpu_last_pcpu(cpu), sd->span)) {
 				schedstat_inc(sd, ttwu_wake_balance);
 				goto out_set_cpu;
 			}
@@ -1264,9 +2099,9 @@ static int try_to_wake_up(task_t * p, un
 out_set_cpu:
 	schedstat_inc(rq, ttwu_attempts);
 	new_cpu = wake_idle(new_cpu, p);
-	if (new_cpu != cpu && cpu_isset(new_cpu, p->cpus_allowed)) {
+	if (new_cpu != cpu && vcpu_isset(new_cpu, p->cpus_allowed)) {
 		schedstat_inc(rq, ttwu_moved);
-		set_task_cpu(p, new_cpu);
+		set_task_vcpu(p, new_cpu);
 		task_rq_unlock(rq, &flags);
 		/* might preempt at this point */
 		rq = task_rq_lock(p, &flags);
@@ -1276,20 +2111,28 @@ out_set_cpu:
 		if (p->array)
 			goto out_running;
 
-		this_cpu = smp_processor_id();
-		cpu = task_cpu(p);
+		this_cpu = this_vcpu();
+		cpu = task_vcpu(p);
 	}
 
 out_activate:
 #endif /* CONFIG_SMP */
 	if (old_state == TASK_UNINTERRUPTIBLE) {
 		rq->nr_uninterruptible--;
+		nr_unint_dec(smp_processor_id(), task_cpu(p),
+				VE_TASK_INFO(p)->owner_env);
 		/*
 		 * Tasks on involuntary sleep don't earn
 		 * sleep_avg beyond just interactive state.
 		 */
 		p->activated = -1;
 	}
+	if (old_state == TASK_INTERRUPTIBLE)
+		nr_sleeping_dec(smp_processor_id(), task_cpu(p),
+				VE_TASK_INFO(p)->owner_env);
+	if (old_state == TASK_STOPPED)
+		nr_stopped_dec(smp_processor_id(), task_cpu(p),
+				VE_TASK_INFO(p)->owner_env);
 
 	/*
 	 * Sync wakeups (i.e. those types of wakeups where the waker
@@ -1328,11 +2171,38 @@ int fastcall wake_up_state(task_t *p, un
 }
 
 #ifdef CONFIG_SMP
-static int find_idlest_cpu(struct task_struct *p, int this_cpu,
+static vcpu_t find_idlest_cpu(struct task_struct *p, vcpu_t this_cpu,
 			   struct sched_domain *sd);
 #endif
 
 /*
+ * init is special, it is forked from swapper (idle_vsched) and should
+ * belong to default_vsched, so we have to change it's vsched/fairsched manually
+ */
+void wake_up_init(task_t *p)
+{
+	runqueue_t *rq;
+	unsigned long flags;
+
+	/* we should change both fairsched node and vsched here */
+	set_task_vsched(p, &default_vsched);
+	set_task_cpu(p, 0);
+
+	/*
+	 * can't call wake_up_new_task() directly here,
+	 * since it assumes that a child belongs to the same vsched
+	 */
+	p->state = TASK_RUNNING;
+	p->sleep_avg = 0;
+	p->interactive_credit = 0;
+	p->prio = effective_prio(p);
+
+	rq = task_rq_lock(p, &flags);
+	__activate_task(p, rq);
+	task_rq_unlock(rq, &flags);
+}
+
+/*
  * Perform scheduler related setup for a newly forked process p.
  * p is forked by current.
  */
@@ -1374,6 +2244,7 @@ void fastcall sched_fork(task_t *p)
 	p->first_time_slice = current->pid;
 	current->time_slice >>= 1;
 	p->timestamp = sched_clock();
+	VE_TASK_INFO(p)->sleep_time -= get_cycles(); /*cosmetic: sleep till wakeup below*/
 	if (unlikely(!current->time_slice)) {
 		/*
 		 * This case is rare, it happens when the parent has only
@@ -1399,14 +2270,20 @@ void fastcall sched_fork(task_t *p)
 void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags)
 {
 	unsigned long flags;
-	int this_cpu, cpu;
+	vcpu_t this_cpu, cpu;
 	runqueue_t *rq, *this_rq;
 
+	if (unlikely(p->pid == 1)) {
+		wake_up_init(p);
+		return;
+	}
+
 	rq = task_rq_lock(p, &flags);
-	cpu = task_cpu(p);
-	this_cpu = smp_processor_id();
+	this_cpu = this_vcpu();
+	cpu = task_vcpu(p);
 
 	BUG_ON(p->state != TASK_RUNNING);
+	BUG_ON(task_vsched(current) != task_vsched(p));
 
 	schedstat_inc(rq, wunt_cnt);
 	/*
@@ -1437,6 +2314,8 @@ void fastcall wake_up_new_task(task_t * 
 				p->array = current->array;
 				p->array->nr_active++;
 				rq->nr_running++;
+				nr_running_inc(smp_processor_id(), task_cpu(p),
+						VE_TASK_INFO(p)->owner_env);
 			}
 			set_need_resched();
 		} else
@@ -1450,7 +2329,7 @@ void fastcall wake_up_new_task(task_t * 
 		 */
 		this_rq = rq;
 	} else {
-		this_rq = cpu_rq(this_cpu);
+		this_rq = vcpu_rq(this_cpu);
 
 		/*
 		 * Not the local CPU - must adjust timestamp. This should
@@ -1495,8 +2374,8 @@ void fastcall sched_exit(task_t * p)
 	 * the sleep_avg of the creator as well.
 	 */
 	if (p->first_time_slice) {
-		creator = find_task_by_pid((pid_t)p->first_time_slice);
-		if (creator && task_cpu(p) == task_cpu(creator)) {
+		creator = find_task_by_pid_all((pid_t)p->first_time_slice);
+		if (creator && task_vcpu(p) == task_vcpu(creator)) {
 			rq = task_rq_lock(creator, &flags);
 			creator->time_slice += p->time_slice;
 			if (unlikely(creator->time_slice > task_timeslice(p)))
@@ -1526,10 +2405,15 @@ void fastcall sched_exit(task_t * p)
  */
 static inline void finish_task_switch(task_t *prev)
 {
-	runqueue_t *rq = this_rq();
-	struct mm_struct *mm = rq->prev_mm;
+	runqueue_t *rq;
+	struct mm_struct *mm;
 	unsigned long prev_task_flags;
+	vcpu_t prev_vcpu, vcpu;
 
+	prev_vcpu = task_vcpu(prev);
+	vcpu = this_vcpu();
+	rq = vcpu_rq(vcpu);
+	mm = rq->prev_mm;
 	rq->prev_mm = NULL;
 
 	/*
@@ -1544,7 +2428,16 @@ static inline void finish_task_switch(ta
 	 *		Manfred Spraul <manfred@colorfullife.com>
 	 */
 	prev_task_flags = prev->flags;
+
+	/*
+	 * no schedule() should happen until vcpu_put,
+	 * and schedule_tail() calls us with preempt enabled...
+	 */
 	finish_arch_switch(rq, prev);
+	if (prev_vcpu != vcpu)
+		vcpu_put(prev_vcpu);
+	local_irq_enable();
+
 	if (mm)
 		mmdrop(mm);
 	if (unlikely(prev_task_flags & PF_DEAD)) {
@@ -1566,8 +2459,9 @@ asmlinkage void schedule_tail(task_t *pr
 	finish_task_switch(prev);
 
 	if (current->set_child_tid)
-		put_user(current->pid, current->set_child_tid);
+		put_user(virt_pid(current), current->set_child_tid);
 }
+EXPORT_SYMBOL(schedule_tail);
 
 /*
  * context_switch - switch to the new MM and the new
@@ -1607,55 +2501,120 @@ task_t * context_switch(runqueue_t *rq, 
  */
 unsigned long nr_running(void)
 {
-	unsigned long i, sum = 0;
-
-	for_each_online_cpu(i)
-		sum += cpu_rq(i)->nr_running;
+	int i;
+	long sum;
 
-	return sum;
+	sum = 0;
+	for (i = 0; i < NR_CPUS; i++)
+		sum += glob_tasks_nrs[i].nr_running;
+	return (unsigned long)(sum < 0 ? 0 : sum);
 }
+EXPORT_SYMBOL(nr_running);
 
 unsigned long nr_uninterruptible(void)
 {
-	unsigned long i, sum = 0;
+	int i;
+	long sum;
 
-	for_each_cpu(i)
-		sum += cpu_rq(i)->nr_uninterruptible;
+	sum = 0;
+	for (i = 0; i < NR_CPUS; i++)
+		sum += glob_tasks_nrs[i].nr_uninterruptible;
+	return (unsigned long)(sum < 0 ? 0 : sum);
+}
+EXPORT_SYMBOL(nr_uninterruptible);
 
-	/*
-	 * Since we read the counters lockless, it might be slightly
-	 * inaccurate. Do not allow it to go below zero though:
-	 */
-	if (unlikely((long)sum < 0))
-		sum = 0;
+unsigned long nr_sleeping(void)
+{
+	int i;
+	long sum;
 
-	return sum;
+	sum = 0;
+	for (i = 0; i < NR_CPUS; i++)
+		sum += glob_tasks_nrs[i].nr_sleeping;
+	return (unsigned long)(sum < 0 ? 0 : sum);
 }
+EXPORT_SYMBOL(nr_sleeping);
 
-unsigned long long nr_context_switches(void)
+unsigned long nr_stopped(void)
 {
-	unsigned long long i, sum = 0;
-
-	for_each_cpu(i)
-		sum += cpu_rq(i)->nr_switches;
+	int i;
+	long sum;
 
-	return sum;
+	sum = 0;
+	for (i = 0; i < NR_CPUS; i++)
+		sum += glob_tasks_nrs[i].nr_stopped;
+	return (unsigned long)(sum < 0 ? 0 : sum);
 }
+EXPORT_SYMBOL(nr_stopped);
 
 unsigned long nr_iowait(void)
 {
-	unsigned long i, sum = 0;
+	int i;
+	long sum;
+
+	sum = 0;
+	for (i = 0; i < NR_CPUS; i++)
+		sum += atomic_read(&glob_tasks_nrs[i].nr_iowait);
+	return (unsigned long)(sum < 0 ? 0 : sum);
+}
 
-	for_each_cpu(i)
-		sum += atomic_read(&cpu_rq(i)->nr_iowait);
+unsigned long long nr_context_switches(void)
+{
+	int i;
+	long long sum;
 
+	sum = 0;
+	for (i = 0; i < NR_CPUS; i++)
+		sum += glob_tasks_nrs[i].nr_switches;
 	return sum;
 }
 
+#ifdef CONFIG_VE
+unsigned long nr_running_ve(struct ve_struct *ve)
+{
+	int i;
+	long sum;
+
+	sum = 0;
+	for (i = 0; i < NR_CPUS; i++)
+		sum += VE_CPU_STATS(ve, i)->nr_running;
+	return (unsigned long)(sum < 0 ? 0 : sum);
+}
+
+unsigned long nr_uninterruptible_ve(struct ve_struct *ve)
+{
+	int i;
+	long sum;
+
+	sum = 0;
+	for (i = 0; i < NR_CPUS; i++)
+		sum += VE_CPU_STATS(ve, i)->nr_unint;
+	return (unsigned long)(sum < 0 ? 0 : sum);
+}
+
+unsigned long nr_iowait_ve(void)
+{
+	int i;
+	long sum;
+	struct vcpu_scheduler *vsched;
+
+	sum = 0;
+#ifdef CONFIG_SCHED_VCPU
+	vsched = this_vsched();
+	for_each_cpu_mask(i, vsched_vcpu_online_map(vsched)) {
+		struct runqueue *rq;
+		rq = vcpu_rq(vsched_vcpu(vsched, i));
+		sum += atomic_read(&rq->nr_iowait);
+	}
+#endif
+	return (unsigned long)(sum < 0 ? 0 : sum);
+}
+#endif
+
 /* cpus with isolated domains */
 cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE;
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
 
 /*
  * double_rq_lock - safely lock two runqueues
@@ -1668,10 +2627,11 @@ cpumask_t __devinitdata cpu_isolated_map
  */
 static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
 {
+	BUG_ON(!irqs_disabled());
 	if (rq1 == rq2)
 		spin_lock(&rq1->lock);
 	else {
-		if (rq1->cpu < rq2->cpu) {
+		if (rq1 < rq2) {
 			spin_lock(&rq1->lock);
 			spin_lock(&rq2->lock);
 		} else {
@@ -1694,13 +2654,19 @@ static void double_rq_unlock(runqueue_t 
 		spin_unlock(&rq2->lock);
 }
 
+#ifdef CONFIG_SMP
 /*
  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
  */
 static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
 {
+	if (unlikely(!irqs_disabled())) {
+		/* printk() doesn't work good under rq->lock */
+		spin_unlock(&this_rq->lock);
+		BUG_ON(1);
+	}
 	if (unlikely(!spin_trylock(&busiest->lock))) {
-		if (busiest->cpu < this_rq->cpu) {
+		if (busiest < this_rq) {
 			spin_unlock(&this_rq->lock);
 			spin_lock(&busiest->lock);
 			spin_lock(&this_rq->lock);
@@ -1712,23 +2678,34 @@ static void double_lock_balance(runqueue
 /*
  * find_idlest_cpu - find the least busy runqueue.
  */
-static int find_idlest_cpu(struct task_struct *p, int this_cpu,
+static vcpu_t find_idlest_cpu(struct task_struct *p, vcpu_t this_cpu,
 			   struct sched_domain *sd)
 {
 	unsigned long load, min_load, this_load;
-	int i, min_cpu;
-	cpumask_t mask;
+	int i;
+	vcpu_t min_cpu;
+	cpumask_t mask, vmask;
+	struct vcpu_scheduler *vsched;
 
-	min_cpu = UINT_MAX;
+	vsched = task_vsched(p);
+	min_cpu = NULL;
 	min_load = ULONG_MAX;
 
-	cpus_and(mask, sd->span, p->cpus_allowed);
+	cpus_and(mask, sd->span, cpu_online_map);
+	cpus_and(vmask, vsched_vcpu_online_map(vsched), p->cpus_allowed);
+
+	for_each_cpu_mask(i, vmask) {
+		vcpu_t vcpu;
+		vcpu = vsched_vcpu(vsched, i);
 
-	for_each_cpu_mask(i, mask) {
-		load = target_load(i);
+		if (!cpu_isset(vcpu_last_pcpu(vcpu), mask))
+			continue;
+		if (vcpu_is_offline(vcpu))
+			continue;
 
+		load = target_load(vcpu);
 		if (load < min_load) {
-			min_cpu = i;
+			min_cpu = vcpu;
 			min_load = load;
 
 			/* break out early on an idle CPU: */
@@ -1737,6 +2714,9 @@ static int find_idlest_cpu(struct task_s
 		}
 	}
 
+	if (min_cpu == NULL)
+		return this_cpu;
+
 	/* add +1 to account for the new task */
 	this_load = source_load(this_cpu) + SCHED_LOAD_SCALE;
 
@@ -1753,6 +2733,7 @@ static int find_idlest_cpu(struct task_s
 
 	return this_cpu;
 }
+#endif
 
 /*
  * If dest_cpu is allowed for this process, migrate the task to it.
@@ -1760,15 +2741,15 @@ static int find_idlest_cpu(struct task_s
  * allow dest_cpu, which will force the cpu onto dest_cpu.  Then
  * the cpu_allowed mask is restored.
  */
-static void sched_migrate_task(task_t *p, int dest_cpu)
+static void sched_migrate_task(task_t *p, vcpu_t dest_cpu)
 {
 	migration_req_t req;
 	runqueue_t *rq;
 	unsigned long flags;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpu_isset(dest_cpu, p->cpus_allowed)
-	    || unlikely(cpu_is_offline(dest_cpu)))
+	if (unlikely(!vcpu_isset(dest_cpu, p->cpus_allowed)
+	    || vcpu_is_offline(dest_cpu)))
 		goto out;
 
 	schedstat_inc(rq, smt_cnt);
@@ -1786,7 +2767,9 @@ static void sched_migrate_task(task_t *p
 out:
 	task_rq_unlock(rq, &flags);
 }
+#endif
 
+#ifdef CONFIG_SMP
 /*
  * sched_exec(): find the highest-level, exec-balance-capable
  * domain and try to migrate the task to the least loaded CPU.
@@ -1797,11 +2780,11 @@ out:
 void sched_exec(void)
 {
 	struct sched_domain *tmp, *sd = NULL;
-	int new_cpu, this_cpu = get_cpu();
+	vcpu_t new_cpu, this_cpu = get_vcpu();
 
 	schedstat_inc(this_rq(), sbe_cnt);
 	/* Prefer the current CPU if there's only this task running */
-	if (this_rq()->nr_running <= 1)
+	if (vcpu_rq(this_cpu)->nr_running <= 1)
 		goto out;
 
 	for_each_domain(this_cpu, tmp)
@@ -1819,7 +2802,7 @@ void sched_exec(void)
 		}
 	}
 out:
-	put_cpu();
+	put_vcpu();
 }
 
 /*
@@ -1828,11 +2811,25 @@ out:
  */
 static inline
 void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
-	       runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
+	       runqueue_t *this_rq, prio_array_t *this_array, vcpu_t this_cpu)
 {
+	struct ve_struct *ve;
+	cycles_t cycles;
+
+	cycles = get_cycles();
+	ve = VE_TASK_INFO(p)->owner_env;
+
 	dequeue_task(p, src_array);
 	src_rq->nr_running--;
-	set_task_cpu(p, this_cpu);
+	if (src_rq->nr_running == 0) {
+		vcpu_detach(src_rq);
+		vcpu_strt_ve_idle(ve, task_cpu(p), cycles);
+	}
+	set_task_vcpu(p, this_cpu);
+	if (this_rq->nr_running == 0) {
+		vcpu_save_ve_idle(ve, this_cpu, cycles);
+		vcpu_attach(this_rq);
+	}
 	this_rq->nr_running++;
 	enqueue_task(p, this_array);
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
@@ -1849,7 +2846,7 @@ void pull_task(runqueue_t *src_rq, prio_
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
 static inline
-int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
+int can_migrate_task(task_t *p, runqueue_t *rq, vcpu_t this_cpu,
 		     struct sched_domain *sd, enum idle_type idle, int *all_pinned)
 {
 	/*
@@ -1860,7 +2857,7 @@ int can_migrate_task(task_t *p, runqueue
 	 */
 	if (task_running(rq, p))
 		return 0;
-	if (!cpu_isset(this_cpu, p->cpus_allowed))
+	if (!vcpu_isset(this_cpu, p->cpus_allowed))
 		return 0;
 
 	*all_pinned = 0;
@@ -1882,7 +2879,7 @@ int can_migrate_task(task_t *p, runqueue
  *
  * Called with both runqueues locked.
  */
-static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
+static int move_tasks(runqueue_t *this_rq, vcpu_t this_cpu, runqueue_t *busiest,
 		      unsigned long max_nr_move, struct sched_domain *sd,
 		      enum idle_type idle, int *all_pinned)
 {
@@ -1891,6 +2888,8 @@ static int move_tasks(runqueue_t *this_r
 	int idx, pulled = 0, pinned = 0;
 	task_t *tmp;
 
+	if (vcpu_is_offline(this_cpu))
+		goto out;
 	if (max_nr_move <= 0 || busiest->nr_running <= 1)
 		goto out;
 
@@ -1972,12 +2971,18 @@ out:
  * moved to restore balance via the imbalance parameter.
  */
 static struct sched_group *
-find_busiest_group(struct sched_domain *sd, int this_cpu,
+find_busiest_group(struct sched_domain *sd, vcpu_t this_cpu,
 		   unsigned long *imbalance, enum idle_type idle)
 {
 	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	cpumask_t tmp;
+	int this_pcpu;
 
+	vsched = vcpu_vsched(this_cpu);
+	this_pcpu = vcpu_last_pcpu(this_cpu);
 	max_load = this_load = total_load = total_pwr = 0;
 
 	do {
@@ -1985,17 +2990,19 @@ find_busiest_group(struct sched_domain *
 		int local_group;
 		int i, nr_cpus = 0;
 
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpu_isset(this_pcpu, group->cpumask);
 
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
+		cpus_and(tmp, group->cpumask, vsched_pcpu_running_map(vsched));
 
-		for_each_cpu_mask(i, group->cpumask) {
+		for_each_cpu_mask(i, tmp) {
+			vcpu = pcpu(i)->vcpu;
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
-				load = target_load(i);
+				load = target_load(vcpu);
 			else
-				load = source_load(i);
+				load = source_load(vcpu);
 
 			nr_cpus++;
 			avg_load += load;
@@ -2024,6 +3031,8 @@ nextgroup:
 
 	if (!busiest || this_load >= max_load)
 		goto out_balanced;
+	if (!this)
+		this = busiest; /* this->cpu_power is needed below */
 
 	avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
 
@@ -2101,21 +3110,33 @@ out_balanced:
 /*
  * find_busiest_queue - find the busiest runqueue among the cpus in group.
  */
-static runqueue_t *find_busiest_queue(struct sched_group *group)
+static vcpu_t find_busiest_queue(vcpu_t this_cpu,
+		struct sched_group *group, enum idle_type idle)
 {
+	cpumask_t tmp;
 	unsigned long load, max_load = 0;
-	runqueue_t *busiest = NULL;
+	vcpu_t vcpu;
+	struct vcpu_scheduler *vsched;
+	vcpu_t busiest = NULL;
 	int i;
 
-	for_each_cpu_mask(i, group->cpumask) {
-		load = source_load(i);
+	vsched = vcpu_vsched(this_cpu);
+	cpus_and(tmp, group->cpumask, cpu_online_map);
+
+	for_each_cpu_mask(i, vsched_vcpu_online_map(vsched)) {
+		vcpu = vsched_vcpu(vsched, i);
+		if (!cpu_isset(vcpu_last_pcpu(vcpu), tmp))
+			continue;
+
+		if (vcpu_rq(vcpu)->nr_running == 1)
+			continue;
 
+		load = source_load(vcpu);
 		if (load > max_load) {
 			max_load = load;
-			busiest = cpu_rq(i);
+			busiest = vcpu;
 		}
 	}
-
 	return busiest;
 }
 
@@ -2125,10 +3146,11 @@ static runqueue_t *find_busiest_queue(st
  *
  * Called with this_rq unlocked.
  */
-static int load_balance(int this_cpu, runqueue_t *this_rq,
+static int load_balance(vcpu_t this_cpu, runqueue_t *this_rq,
 			struct sched_domain *sd, enum idle_type idle)
 {
 	struct sched_group *group;
+	vcpu_t busiest_vcpu;
 	runqueue_t *busiest;
 	unsigned long imbalance;
 	int nr_moved, all_pinned=0;
@@ -2136,27 +3158,40 @@ static int load_balance(int this_cpu, ru
 	spin_lock(&this_rq->lock);
 	schedstat_inc(sd, lb_cnt[idle]);
 
+#ifdef CONFIG_SCHED_VCPU
+	if (likely(vcpu_vsched(this_cpu) == &idle_vsched)) {
+		/*
+		 * Find idle vcpu to balance to
+		 */
+		this_cpu = find_idle_target();
+		if (!this_cpu)
+			goto out_balanced;
+		spin_unlock(&this_rq->lock);
+		this_rq = vcpu_rq(this_cpu);
+		spin_lock(&this_rq->lock);
+	}
+#endif
 	group = find_busiest_group(sd, this_cpu, &imbalance, idle);
 	if (!group) {
 		schedstat_inc(sd, lb_nobusyg[idle]);
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group);
-	if (!busiest) {
+	busiest_vcpu = find_busiest_queue(this_cpu, group, idle);
+	if (!busiest_vcpu) {
 		schedstat_inc(sd, lb_nobusyq[idle]);
 		goto out_balanced;
 	}
 
+	busiest = vcpu_rq(busiest_vcpu);
+
 	/*
 	 * This should be "impossible", but since load
 	 * balancing is inherently racy and statistical,
 	 * it could happen in theory.
 	 */
-	if (unlikely(busiest == this_rq)) {
-		WARN_ON(1);
+	if (unlikely(busiest == this_rq))
 		goto out_balanced;
-	}
 
 	schedstat_add(sd, lb_imbalance[idle], imbalance);
 
@@ -2230,11 +3265,12 @@ out_balanced:
  * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
  * this_rq is locked.
  */
-static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
+static int load_balance_newidle(vcpu_t this_cpu, runqueue_t *this_rq,
 				struct sched_domain *sd)
 {
 	struct sched_group *group;
-	runqueue_t *busiest = NULL;
+	vcpu_t busiest_vcpu;
+	runqueue_t *busiest;
 	unsigned long imbalance;
 	int nr_moved = 0;
 
@@ -2245,11 +3281,12 @@ static int load_balance_newidle(int this
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group);
-	if (!busiest || busiest == this_rq) {
+	busiest_vcpu = find_busiest_queue(this_cpu, group, NEWLY_IDLE);
+	if (!busiest_vcpu || busiest_vcpu == this_cpu) {
 		schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
 		goto out_balanced;
 	}
+	busiest = vcpu_rq(busiest_vcpu);
 
 	/* Attempt to move tasks */
 	double_lock_balance(this_rq, busiest);
@@ -2274,19 +3311,22 @@ out_balanced:
 /*
  * idle_balance is called by schedule() if this_cpu is about to become
  * idle. Attempts to pull tasks from other CPUs.
+ *
+ * Returns whether to continue with another runqueue
+ * instead of switching to idle.
  */
-static inline void idle_balance(int this_cpu, runqueue_t *this_rq)
+static int idle_balance(vcpu_t this_cpu, runqueue_t *this_rq)
 {
 	struct sched_domain *sd;
 
 	for_each_domain(this_cpu, sd) {
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
-			if (load_balance_newidle(this_cpu, this_rq, sd)) {
+			if (load_balance_newidle(this_cpu, this_rq, sd))
 				/* We've pulled tasks over so stop searching */
-				break;
-			}
+				return 1;
 		}
 	}
+	return 0;
 }
 
 /*
@@ -2296,18 +3336,28 @@ static inline void idle_balance(int this
  * logical imbalance.
  *
  * Called with busiest locked.
+ *
+ * In human terms: balancing of CPU load by moving tasks between CPUs is
+ * performed by 2 methods, push and pull.
+ * In certain places when CPU is found to be idle, it performs pull from busy
+ * CPU to current (idle) CPU.
+ * active_load_balance implements push method, with migration thread getting
+ * scheduled on a busy CPU (hence, making all running processes on this CPU sit
+ * in the queue) and selecting where to push and which task.
  */
-static void active_load_balance(runqueue_t *busiest, int busiest_cpu)
+static void active_load_balance(runqueue_t *busiest, vcpu_t busiest_cpu)
 {
 	struct sched_domain *sd;
 	runqueue_t *target_rq;
-	int target_cpu = busiest->push_cpu;
+	vcpu_t target_cpu = busiest->push_cpu;
+	struct vcpu_scheduler *vsched;
+	int i;
 
 	schedstat_inc(busiest, alb_cnt);
 	if (busiest->nr_running <= 1)
 		return;
 
-	target_rq = cpu_rq(target_cpu);
+	target_rq = vcpu_rq(target_cpu);
 
 	/*
 	 * This condition is "impossible", but since load
@@ -2320,11 +3370,13 @@ static void active_load_balance(runqueue
 	double_lock_balance(busiest, target_rq);
 
 	for_each_domain(target_cpu, sd)
-		if (cpu_isset(busiest_cpu, sd->span))
+		if (cpu_isset(vcpu_last_pcpu(busiest_cpu), sd->span))
 				break;
 
-	if (unlikely(sd == NULL))
-		goto out;
+	if (unlikely(sd == NULL)) {
+		spin_unlock(&target_rq->lock);
+		return;
+	}
 
 	if (move_tasks(target_rq, target_cpu, busiest, 1, sd, IDLE, NULL)) {
 		schedstat_inc(busiest, alb_lost);
@@ -2332,8 +3384,30 @@ static void active_load_balance(runqueue
 	} else {
 		schedstat_inc(busiest, alb_failed);
 	}
-out:
 	spin_unlock(&target_rq->lock);
+
+#ifdef CONFIG_SCHED_VCPU
+	vsched = vcpu_vsched(busiest_cpu);
+ 	if (busiest->nr_running > 2) { /* 1 for migration thread, 1 for task */
+		cpumask_t tmp;
+		runqueue_t *rq;
+		vcpu_t vcpu;
+
+		cpus_andnot(tmp, vsched->vcpu_online_map,
+					vsched->vcpu_running_map);
+		for_each_cpu_mask(i, tmp) {
+			vcpu = vsched_vcpu(vsched, i);
+			if (!idle_vcpu(vcpu))
+				continue;
+			rq = vcpu_rq(vcpu);
+			double_lock_balance(busiest, rq);
+			move_tasks(rq, vcpu, busiest, 1, sd, IDLE, NULL);
+			spin_unlock(&rq->lock);
+			if (busiest->nr_running <= 2)
+				break;
+		}
+	}
+#endif
 }
 
 /*
@@ -2346,15 +3420,14 @@ out:
  */
 
 /* Don't have all balancing operations going off at once */
-#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS)
+#define CPU_OFFSET(cpu) (HZ * (cpu) / NR_CPUS)
 
 #define IRQ_QUANTUM	(HZ / 10)
 
-static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
+static void rebalance_tick(vcpu_t this_cpu, runqueue_t *this_rq,
 			   enum idle_type idle)
 {
-	unsigned long old_load, this_load;
-	unsigned long j = jiffies + CPU_OFFSET(this_cpu);
+	unsigned long j;
 	struct sched_domain *sd;
 
 	if (unlikely(--this_rq->irq_quantum <= 0)) {
@@ -2369,23 +3442,15 @@ static void rebalance_tick(int this_cpu,
 	}
 
 	/* Update our load */
-	old_load = this_rq->cpu_load;
-	this_load = current_load(this_rq);
-	/*
-	 * Round up the averaging division if load is increasing. This
-	 * prevents us from getting stuck on 9 if the load is 10, for
-	 * example.
-	 */
-	if (this_load > old_load)
-		old_load++;
-	this_rq->cpu_load = (old_load + this_load) / 2;
+	update_rq_cpu_load(this_rq);
 
 	/*
 	 * Isolated cpus don't get load-balanced.
 	 */
-	if (cpu_isset(this_cpu, cpu_isolated_map))
+	if (vcpu_isset(this_cpu, cpu_isolated_map))
 		return;
 
+	j = jiffies + CPU_OFFSET(smp_processor_id());
 	for_each_domain(this_cpu, sd) {
 		unsigned long interval = sd->balance_interval;
 
@@ -2396,7 +3461,6 @@ static void rebalance_tick(int this_cpu,
 		interval = msecs_to_jiffies(interval);
 		if (unlikely(!interval))
 			interval = 1;
-
 		if (j - sd->last_balance >= interval) {
 			if (load_balance(this_cpu, this_rq, sd, idle)) {
 				/* We've pulled tasks over so no longer idle */
@@ -2410,17 +3474,19 @@ static void rebalance_tick(int this_cpu,
 /*
  * on UP we do not need to balance between CPUs:
  */
-static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle)
+static inline void rebalance_tick(vcpu_t cpu, runqueue_t *rq, enum idle_type idle)
 {
 }
-static inline void idle_balance(int cpu, runqueue_t *rq)
+static inline int idle_balance(vcpu_t cpu, struct rq *rq)
 {
 }
 #endif
 
-static inline int wake_priority_sleeper(runqueue_t *rq)
+static inline int wake_priority_sleeper(runqueue_t *rq, task_t *idle)
 {
 	int ret = 0;
+#ifndef CONFIG_SCHED_VCPU
+	/* FIXME: can we implement SMT priority sleeping for this? */
 #ifdef CONFIG_SCHED_SMT
 	spin_lock(&rq->lock);
 	/*
@@ -2428,11 +3494,12 @@ static inline int wake_priority_sleeper(
 	 * reasons reschedule the idle task to see if it can now run.
 	 */
 	if (rq->nr_running) {
-		resched_task(rq->idle);
+		resched_task(idle);
 		ret = 1;
 	}
 	spin_unlock(&rq->lock);
 #endif
+#endif
 	return ret;
 }
 
@@ -2455,6 +3522,25 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 	  (jiffies - (rq)->switch_timestamp > STARVATION_LIMIT)) || \
 	 ((rq)->curr->static_prio > (rq)->best_expired_prio))
 
+#ifdef CONFIG_VE
+#define update_ve_nice(p, tick)		do {			\
+		VE_CPU_STATS(VE_TASK_INFO(p)->owner_env,	\
+			task_cpu(p))->nice += tick;		\
+	} while (0)
+#define update_ve_user(p, tick)		do {			\
+		VE_CPU_STATS(VE_TASK_INFO(p)->owner_env,	\
+			task_cpu(p))->user += tick;		\
+	} while (0)
+#define update_ve_system(p, tick)	do {			\
+		VE_CPU_STATS(VE_TASK_INFO(p)->owner_env,	\
+			task_cpu(p))->system += tick;		\
+	} while (0)
+#else
+#define update_ve_nice(p, tick)		do { } while (0)
+#define update_ve_user(p, tick)		do { } while (0)
+#define update_ve_system(p, tick)	do { } while (0)
+#endif
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -2465,12 +3551,17 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 void scheduler_tick(int user_ticks, int sys_ticks)
 {
 	int cpu = smp_processor_id();
+	vcpu_t vcpu;
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	runqueue_t *rq = this_rq();
+	runqueue_t *rq;
 	task_t *p = current;
 
+	vcpu = this_vcpu();
+	rq = vcpu_rq(vcpu);
 	rq->timestamp_last_tick = sched_clock();
 
+	set_tsk_need_resched(p); //FIXME
+
 	if (rcu_pending(cpu))
 		rcu_check_callbacks(cpu, user_ticks);
 
@@ -2482,22 +3573,25 @@ void scheduler_tick(int user_ticks, int 
 		cpustat->softirq += sys_ticks;
 		sys_ticks = 0;
 	}
-
-	if (p == rq->idle) {
-		if (atomic_read(&rq->nr_iowait) > 0)
+	if (p == pcpu(cpu)->idle) {
+		if (atomic_read(&glob_tasks_nrs[cpu].nr_iowait) > 0)
 			cpustat->iowait += sys_ticks;
 		else
 			cpustat->idle += sys_ticks;
-		if (wake_priority_sleeper(rq))
+		if (wake_priority_sleeper(rq, pcpu(cpu)->idle))
 			goto out;
-		rebalance_tick(cpu, rq, IDLE);
+		rebalance_tick(vcpu, rq, IDLE);
 		return;
 	}
-	if (TASK_NICE(p) > 0)
+	if (TASK_NICE(p) > 0) {
 		cpustat->nice += user_ticks;
-	else
+		update_ve_nice(p, user_ticks);
+	} else {
 		cpustat->user += user_ticks;
+		update_ve_user(p, user_ticks);
+	}
 	cpustat->system += sys_ticks;
+	update_ve_system(p, sys_ticks);
 
 	/* Task might have expired already, but not scheduled off yet */
 	if (p->array != rq->active) {
@@ -2558,9 +3652,12 @@ void scheduler_tick(int user_ticks, int 
 		 * This only applies to tasks in the interactive
 		 * delta range with at least TIMESLICE_GRANULARITY to requeue.
 		 */
+		unsigned long ts_gran;
+
+		ts_gran = TIMESLICE_GRANULARITY(p);
 		if (TASK_INTERACTIVE(p) && !((task_timeslice(p) -
-			p->time_slice) % TIMESLICE_GRANULARITY(p)) &&
-			(p->time_slice >= TIMESLICE_GRANULARITY(p)) &&
+			p->time_slice) % ts_gran) &&
+			(p->time_slice >= ts_gran) &&
 			(p->array == rq->active)) {
 
 			dequeue_task(p, rq->active);
@@ -2572,18 +3669,22 @@ void scheduler_tick(int user_ticks, int 
 out_unlock:
 	spin_unlock(&rq->lock);
 out:
-	rebalance_tick(cpu, rq, NOT_IDLE);
+	rebalance_tick(vcpu, rq, NOT_IDLE);
 }
 
-#ifdef CONFIG_SCHED_SMT
-static inline void wakeup_busy_runqueue(runqueue_t *rq)
+#if defined(CONFIG_SCHED_SMT) && !defined(CONFIG_SCHED_VCPU)
+/* FIXME: SMT scheduling */
+static inline void wakeup_busy_runqueue(int pcpu_num)
 {
+	runqueue_t *smt_rq;
+	smt_rq = vcpu_rq(vcpu(pcpu_num));
+
 	/* If an SMT runqueue is sleeping due to priority reasons wake it up */
-	if (rq->curr == rq->idle && rq->nr_running)
-		resched_task(rq->idle);
+	if (smt_rq->curr == pcpu(pcpu_num)->idle && smt_rq->nr_running)
+		resched_task(pcpu(pcpu_num)->idle);
 }
 
-static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
+static void wake_sleeping_dependent(vcpu_t this_cpu, runqueue_t *this_rq)
 {
 	struct sched_domain *sd = this_rq->sd;
 	cpumask_t sibling_map;
@@ -2609,11 +3710,8 @@ static inline void wake_sleeping_depende
 	 */
 	cpu_clear(this_cpu, sibling_map);
 
-	for_each_cpu_mask(i, sibling_map) {
-		runqueue_t *smt_rq = cpu_rq(i);
-
-		wakeup_busy_runqueue(smt_rq);
-	}
+	for_each_cpu_mask(i, sibling_map)
+		wakeup_busy_runqueue(i);
 
 	for_each_cpu_mask(i, sibling_map)
 		spin_unlock(&cpu_rq(i)->lock);
@@ -2623,7 +3721,7 @@ static inline void wake_sleeping_depende
 	 */
 }
 
-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
+static int dependent_sleeper(vcpu_t cpu, runqueue_t *rq)
 {
 	struct sched_domain *sd = this_rq->sd;
 	cpumask_t sibling_map;
@@ -2641,7 +3739,7 @@ static inline int dependent_sleeper(int 
 	spin_unlock(&this_rq->lock);
 	sibling_map = sd->span;
 	for_each_cpu_mask(i, sibling_map)
-		spin_lock(&cpu_rq(i)->lock);
+		spin_lock(&vcpu_rq(vcpu(i))->lock);
 	cpu_clear(this_cpu, sibling_map);
 
 	/*
@@ -2688,11 +3786,11 @@ static inline int dependent_sleeper(int 
 					ret = 1;
 
 check_smt_task:
-		if ((!smt_curr->mm && smt_curr != smt_rq->idle) ||
+		if ((!smt_curr->mm && smt_curr != pcpu(i)->idle) ||
 			rt_task(smt_curr))
 				continue;
 		if (!p->mm) {
-			wakeup_busy_runqueue(smt_rq);
+			wakeup_busy_runqueue(i);
 			continue;
 		}
 
@@ -2710,7 +3808,7 @@ check_smt_task:
 				task_timeslice(smt_curr))
 					resched_task(smt_curr);
 			else
-				wakeup_busy_runqueue(smt_rq);
+				wakeup_busy_runqueue(i);
 		}
 	}
 out_unlock:
@@ -2719,16 +3817,34 @@ out_unlock:
 	return ret;
 }
 #else
-static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
+static inline void wake_sleeping_dependent(vcpu_t this_cpu, runqueue_t *this_rq)
 {
 }
 
-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
+static inline int dependent_sleeper(vcpu_t this_cpu, runqueue_t *this_rq)
 {
 	return 0;
 }
 #endif
 
+static void update_sched_lat(struct task_struct *t, cycles_t cycles)
+{
+	int cpu;
+	cycles_t ve_wstamp;
+
+	/* safe due to runqueue lock */
+	ve_wstamp = VE_TASK_INFO(t)->wakeup_stamp;
+	cpu = smp_processor_id();
+	if (ve_wstamp && cycles > ve_wstamp) {
+		KSTAT_LAT_PCPU_ADD(&kstat_glob.sched_lat,
+				cpu, cycles - ve_wstamp);
+#ifdef CONFIG_VE
+		KSTAT_LAT_PCPU_ADD(&VE_TASK_INFO(t)->exec_env->sched_lat_ve,
+				cpu, cycles - ve_wstamp);
+#endif
+	}
+}
+
 /*
  * schedule() is the main scheduler function.
  */
@@ -2741,7 +3857,9 @@ asmlinkage void __sched schedule(void)
 	struct list_head *queue;
 	unsigned long long now;
 	unsigned long run_time;
-	int cpu, idx;
+	int idx;
+	vcpu_t vcpu;
+	cycles_t cycles;
 
 	//WARN_ON(system_state == SYSTEM_BOOTING);
 	/*
@@ -2755,8 +3873,8 @@ asmlinkage void __sched schedule(void)
 			dump_stack();
 		}
 	}
-
 need_resched:
+	cycles = get_cycles();
 	preempt_disable();
 	prev = current;
 	rq = this_rq();
@@ -2765,7 +3883,7 @@ need_resched:
 	 * The idle thread is not allowed to schedule!
 	 * Remove this check after it has been exercised a bit.
 	 */
-	if (unlikely(current == rq->idle) && current->state != TASK_RUNNING) {
+	if (unlikely(current == this_pcpu()->idle) && current->state != TASK_RUNNING) {
 		printk(KERN_ERR "bad: scheduling from the idle thread!\n");
 		dump_stack();
 	}
@@ -2773,9 +3891,11 @@ need_resched:
 	release_kernel_lock(prev);
 	schedstat_inc(rq, sched_cnt);
 	now = sched_clock();
-	if (likely(now - prev->timestamp < NS_MAX_SLEEP_AVG))
+	if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) {
 		run_time = now - prev->timestamp;
-	else
+		if (unlikely((long long)(now - prev->timestamp) < 0))
+			run_time = 0;
+	} else
 		run_time = NS_MAX_SLEEP_AVG;
 
 	/*
@@ -2797,8 +3917,10 @@ need_resched:
 	switch_count = &prev->nivcsw;
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
 		switch_count = &prev->nvcsw;
-		if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
-				unlikely(signal_pending(prev))))
+		if (unlikely(((prev->state & TASK_INTERRUPTIBLE) &&
+				unlikely(signal_pending(prev))) ||
+			     ((prev->state & TASK_STOPPED) &&
+				sigismember(&prev->pending.signal, SIGKILL))))
 			prev->state = TASK_RUNNING;
 		else {
 			if (prev->state == TASK_UNINTERRUPTIBLE)
@@ -2807,26 +3929,39 @@ need_resched:
 		}
 	}
 
-	cpu = smp_processor_id();
+	prev->sleep_avg -= run_time;
+	if ((long)prev->sleep_avg <= 0) {
+		prev->sleep_avg = 0;
+		if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev)))
+			prev->interactive_credit--;
+	}
+
+	vcpu = rq_vcpu(rq);
+	if (rq->nr_running &&
+	    jiffies - vcpu->start_time < msecs_to_jiffies(vcpu_timeslice))
+		goto same_vcpu;
+
+	if (unlikely(!rq->nr_running))
+		idle_balance(vcpu, rq);
+	vcpu = schedule_vcpu(vcpu, cycles);
+	rq = vcpu_rq(vcpu);
+
 	if (unlikely(!rq->nr_running)) {
 go_idle:
-		idle_balance(cpu, rq);
-		if (!rq->nr_running) {
-			next = rq->idle;
-			rq->switch_timestamp = jiffies;
-			wake_sleeping_dependent(cpu, rq);
-			/*
-			 * wake_sleeping_dependent() might have released
-			 * the runqueue, so break out if we got new
-			 * tasks meanwhile:
-			 */
-			if (!rq->nr_running)
-				goto switch_tasks;
-		}
+		next = this_pcpu()->idle;
+		rq->switch_timestamp = jiffies;
+		wake_sleeping_dependent(vcpu, rq);
+		/*
+		 * wake_sleeping_dependent() might have released
+		 * the runqueue, so break out if we got new
+		 * tasks meanwhile:
+		 */
+		if (!rq->nr_running)
+			goto switch_tasks;
 	} else {
-		if (dependent_sleeper(cpu, rq)) {
+		if (dependent_sleeper(vcpu, rq)) {
+			next = this_pcpu()->idle;
 			schedstat_inc(rq, sched_goidle);
-			next = rq->idle;
 			goto switch_tasks;
 		}
 		/*
@@ -2838,6 +3973,7 @@ go_idle:
 			goto go_idle;
 	}
 
+same_vcpu:
 	array = rq->active;
 	if (unlikely(!array->nr_active)) {
 		/*
@@ -2858,6 +3994,8 @@ go_idle:
 
 	if (!rt_task(next) && next->activated > 0) {
 		unsigned long long delta = now - next->timestamp;
+		if (unlikely((long long)delta < 0))
+			delta = 0;
 
 		if (next->activated == 1)
 			delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
@@ -2871,30 +4009,63 @@ go_idle:
 switch_tasks:
 	prefetch(next);
 	clear_tsk_need_resched(prev);
-	rcu_qsctr_inc(task_cpu(prev));
+	rcu_qsctr_inc(task_pcpu(prev));
 
-	prev->sleep_avg -= run_time;
-	if ((long)prev->sleep_avg <= 0) {
-		prev->sleep_avg = 0;
-		if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev)))
-			prev->interactive_credit--;
-	}
+	/* updated w/o rq->lock, which is ok due to after-read-checks */
 	prev->timestamp = prev->last_ran = now;
 
 	sched_info_switch(prev, next);
 	if (likely(prev != next)) {
+		/* current physical CPU id should be valid after switch */
+		set_task_vcpu(next, vcpu);
+		set_task_pcpu(next, task_pcpu(prev));
+
 		next->timestamp = now;
 		rq->nr_switches++;
+		glob_tasks_nrs[smp_processor_id()].nr_switches++;
 		rq->curr = next;
 		++*switch_count;
 
+		VE_TASK_INFO(prev)->sleep_stamp = cycles;
+		if (prev->state == TASK_RUNNING && prev != this_pcpu()->idle)
+			write_wakeup_stamp(prev, cycles);
+		update_sched_lat(next, cycles);
+
+		/* because next & prev are protected with
+		 * runqueue lock we may not worry about
+		 * wakeup_stamp and sched_time protection
+		 * (same thing in 'else' branch below)
+		 */
+		if (prev != this_pcpu()->idle) {
+#ifdef CONFIG_VE
+			VE_CPU_STATS(VE_TASK_INFO(prev)->owner_env,
+					smp_processor_id())->used_time +=
+				cycles - VE_TASK_INFO(prev)->sched_time;
+#endif
+			VE_TASK_INFO(prev)->sched_time = 0;
+		}
+		VE_TASK_INFO(next)->sched_time = cycles;
+		write_wakeup_stamp(next, 0);
+
 		prepare_arch_switch(rq, next);
 		prev = context_switch(rq, prev, next);
 		barrier();
 
 		finish_task_switch(prev);
-	} else
+	} else {
+		if (prev != this_pcpu()->idle) {
+			cycles_t cycles;
+
+			cycles = get_cycles();
+#ifdef CONFIG_VE
+			VE_CPU_STATS(VE_TASK_INFO(prev)->owner_env,
+					smp_processor_id())->used_time +=
+				cycles - VE_TASK_INFO(prev)->sched_time;
+#endif
+			VE_TASK_INFO(prev)->sched_time = cycles;
+		}
 		spin_unlock_irq(&rq->lock);
+	}
 
 	reacquire_kernel_lock(current);
 	preempt_enable_no_resched();
@@ -3186,6 +4357,9 @@ EXPORT_SYMBOL(wait_for_completion_interr
 	__remove_wait_queue(q, &wait);			\
 	spin_unlock_irqrestore(&q->lock, flags);
 
+#ifdef CONFIG_VE
+#define SLEEP_ON_BKLCHECK
+#else
 #define SLEEP_ON_BKLCHECK				\
 	if (unlikely(!kernel_locked()) &&		\
 	    sleep_on_bkl_warnings < 10) {		\
@@ -3194,6 +4368,7 @@ EXPORT_SYMBOL(wait_for_completion_interr
 	}
 
 static int sleep_on_bkl_warnings;
+#endif
 
 void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)
 {
@@ -3360,32 +4535,12 @@ int task_nice(const task_t *p)
 }
 
 /**
- * idle_cpu - is a given cpu idle currently?
- * @cpu: the processor in question.
- */
-int idle_cpu(int cpu)
-{
-	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
-}
-
-EXPORT_SYMBOL_GPL(idle_cpu);
-
-/**
- * idle_task - return the idle task for a given cpu.
- * @cpu: the processor in question.
- */
-task_t *idle_task(int cpu)
-{
-       return cpu_rq(cpu)->idle;
-}
-
-/**
  * find_process_by_pid - find a process with a matching PID value.
  * @pid: the pid in question.
  */
 static inline task_t *find_process_by_pid(pid_t pid)
 {
-	return pid ? find_task_by_pid(pid) : current;
+	return pid ? find_task_by_pid_ve(pid) : current;
 }
 
 /* Actually do priority change: must hold rq lock. */
@@ -3403,7 +4558,7 @@ static void __setscheduler(struct task_s
 /*
  * setscheduler - change the scheduling policy and/or RT priority of a thread.
  */
-static int setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+int setscheduler(pid_t pid, int policy, struct sched_param __user *param)
 {
 	struct sched_param lp;
 	int retval = -EINVAL;
@@ -3454,7 +4609,7 @@ recheck:
 
 	retval = -EPERM;
 	if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
-	    !capable(CAP_SYS_NICE))
+	    !capable(CAP_SYS_ADMIN))
 		goto out_unlock;
 	if ((current->euid != p->euid) && (current->euid != p->uid) &&
 	    !capable(CAP_SYS_NICE))
@@ -3500,6 +4655,7 @@ out_unlock:
 out_nounlock:
 	return retval;
 }
+EXPORT_SYMBOL(setscheduler);
 
 /**
  * sys_sched_setscheduler - set/change the scheduler policy and RT priority
@@ -3822,9 +4978,13 @@ EXPORT_SYMBOL(yield);
 void __sched io_schedule(void)
 {
 	struct runqueue *rq = this_rq();
+	int cpu;
 
+	cpu = smp_processor_id();
 	atomic_inc(&rq->nr_iowait);
+	nr_iowait_inc(cpu);
 	schedule();
+	nr_iowait_dec(cpu);
 	atomic_dec(&rq->nr_iowait);
 }
 
@@ -3834,9 +4994,13 @@ long __sched io_schedule_timeout(long ti
 {
 	struct runqueue *rq = this_rq();
 	long ret;
+	int cpu;
 
+	cpu = smp_processor_id();
 	atomic_inc(&rq->nr_iowait);
+	nr_iowait_inc(cpu);
 	ret = schedule_timeout(timeout);
+	nr_iowait_dec(cpu);
 	atomic_dec(&rq->nr_iowait);
 	return ret;
 }
@@ -3956,16 +5120,13 @@ static void show_task(task_t * p)
 		printk(stat_nam[state]);
 	else
 		printk("?");
+	if (state)
+		printk(" %012Lx", (unsigned long long)
+			(VE_TASK_INFO(p)->sleep_stamp >> 16));
 #if (BITS_PER_LONG == 32)
-	if (state == TASK_RUNNING)
-		printk(" running ");
-	else
-		printk(" %08lX ", thread_saved_pc(p));
+	printk(" %08lX ", (unsigned long)p);
 #else
-	if (state == TASK_RUNNING)
-		printk("  running task   ");
-	else
-		printk(" %016lx ", thread_saved_pc(p));
+	printk(" %016lx ", (unsigned long)p);
 #endif
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	{
@@ -3993,8 +5154,7 @@ static void show_task(task_t * p)
 	else
 		printk(" (NOTLB)\n");
 
-	if (state != TASK_RUNNING)
-		show_stack(p, NULL);
+	show_stack(p, NULL);
 }
 
 void show_state(void)
@@ -4004,42 +5164,82 @@ void show_state(void)
 #if (BITS_PER_LONG == 32)
 	printk("\n"
 	       "                                               sibling\n");
-	printk("  task             PC      pid father child younger older\n");
+	printk("  task       taskaddr      pid father child younger older\n");
 #else
 	printk("\n"
 	       "                                                       sibling\n");
-	printk("  task                 PC          pid father child younger older\n");
+	printk("  task           taskaddr          pid father child younger older\n");
 #endif
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		/*
 		 * reset the NMI-timeout, listing all files on a slow
 		 * console might take alot of time:
 		 */
 		touch_nmi_watchdog();
 		show_task(p);
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 	read_unlock(&tasklist_lock);
 }
 
 EXPORT_SYMBOL_GPL(show_state);
 
+static void init_rq(struct runqueue *rq);
+
+static void init_vcpu(vcpu_t vcpu, int id)
+{
+	memset(vcpu, 0, sizeof(struct vcpu_info));
+	vcpu->id = id;
+#ifdef CONFIG_SCHED_VCPU
+	vcpu->last_pcpu = id;
+#endif
+	init_rq(vcpu_rq(vcpu));
+}
+
 void __devinit init_idle(task_t *idle, int cpu)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	runqueue_t *rq;
 	unsigned long flags;
 
+#ifdef CONFIG_SCHED_VCPU
+	if (cpu > 0 && __add_vcpu(&idle_vsched, cpu) != 0)
+		panic("Can't create idle vcpu %d\n", cpu);
+
+	cpu_set(cpu, idle_vsched.pcpu_running_map);
+#endif
+	vsched = &idle_vsched;
+	vcpu = vsched_vcpu(vsched, cpu);
+	rq = vcpu_rq(vcpu);
+
 	idle->sleep_avg = 0;
 	idle->interactive_credit = 0;
 	idle->array = NULL;
 	idle->prio = MAX_PRIO;
 	idle->state = TASK_RUNNING;
+	set_task_vsched(idle, &idle_vsched);
 	set_task_cpu(idle, cpu);
 
 	spin_lock_irqsave(&rq->lock, flags);
-	rq->curr = rq->idle = idle;
+	pcpu(cpu)->idle = idle;
+	rq->curr = idle;
 	set_tsk_need_resched(idle);
+	set_task_pcpu(idle, cpu);
+	set_task_vsched(idle, vsched);
+	set_task_vcpu(idle, vcpu);
+#ifdef CONFIG_SCHED_VCPU
+	/* the following code is very close to vcpu_get */
+	spin_lock(&fairsched_lock);
+	pcpu(cpu)->vcpu = vcpu;
+	pcpu(cpu)->vsched = vcpu->vsched;
+	list_move_tail(&vcpu->list, &vsched->running_list);
+	__set_bit(cpu, vsched->vcpu_running_map.bits);
+	__set_bit(cpu, vsched->pcpu_running_map.bits);
+	vcpu->running = 1;
+	spin_unlock(&fairsched_lock);
+#endif
 	spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
@@ -4059,7 +5259,7 @@ void __devinit init_idle(task_t *idle, i
  */
 cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
 /*
  * This is how migration works:
  *
@@ -4085,15 +5285,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
  * task must not exit() & deallocate itself prematurely.  The
  * call is not atomic; no spinlocks may be held.
  */
+#ifdef CONFIG_SMP
 int set_cpus_allowed(task_t *p, cpumask_t new_mask)
 {
 	unsigned long flags;
 	int ret = 0;
 	migration_req_t req;
 	runqueue_t *rq;
+	struct vcpu_scheduler *vsched;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpus_intersects(new_mask, cpu_online_map)) {
+	vsched = task_vsched(p);
+	if (!cpus_intersects(new_mask, vsched_vcpu_online_map(vsched))) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -4103,7 +5306,8 @@ int set_cpus_allowed(task_t *p, cpumask_
 	if (cpu_isset(task_cpu(p), new_mask))
 		goto out;
 
-	if (migrate_task(p, any_online_cpu(new_mask), &req)) {
+	if (migrate_task(p, vsched_vcpu(vsched, any_online_cpu(new_mask)),
+								&req)) {
 		/* Need help from migration thread: drop lock and wait. */
 		task_rq_unlock(rq, &flags);
 		wake_up_process(rq->migration_thread);
@@ -4117,6 +5321,7 @@ out:
 }
 
 EXPORT_SYMBOL_GPL(set_cpus_allowed);
+#endif
 
 /*
  * Move (not current) task off this cpu, onto dest cpu.  We're doing
@@ -4127,25 +5332,31 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
  * So we race with normal scheduler movements, but that's OK, as long
  * as the task is no longer on this CPU.
  */
-static void __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+static int __migrate_task(struct task_struct *p, vcpu_t src_cpu, vcpu_t dest_cpu)
 {
 	runqueue_t *rq_dest, *rq_src;
+	int res = 0;
 
-	if (unlikely(cpu_is_offline(dest_cpu)))
-		return;
+	if (unlikely(vcpu_is_offline(dest_cpu)))
+		return 0;
 
-	rq_src = cpu_rq(src_cpu);
-	rq_dest = cpu_rq(dest_cpu);
+#ifdef CONFIG_SCHED_VCPU
+	BUG_ON(vcpu_vsched(src_cpu) == &idle_vsched);
+#endif
+	rq_src = vcpu_rq(src_cpu);
+	rq_dest = vcpu_rq(dest_cpu);
 
 	double_rq_lock(rq_src, rq_dest);
 	/* Already moved. */
-	if (task_cpu(p) != src_cpu)
+	if (task_vcpu(p) != src_cpu)
 		goto out;
 	/* Affinity changed (again). */
-	if (!cpu_isset(dest_cpu, p->cpus_allowed))
+	if (!vcpu_isset(dest_cpu, p->cpus_allowed))
 		goto out;
 
-	set_task_cpu(p, dest_cpu);
+	BUG_ON(task_running(rq_src, p));
+	set_task_vsched(p, vcpu_vsched(dest_cpu));
+	set_task_vcpu(p, dest_cpu);
 	if (p->array) {
 		/*
 		 * Sync timestamp with rq_dest's before activating.
@@ -4160,9 +5371,11 @@ static void __migrate_task(struct task_s
 		if (TASK_PREEMPTS_CURR(p, rq_dest))
 			resched_task(rq_dest->curr);
 	}
+	res = 1;
 
 out:
 	double_rq_unlock(rq_src, rq_dest);
+	return res;
 }
 
 /*
@@ -4170,12 +5383,14 @@ out:
  * thread migration by bumping thread off CPU then 'pushing' onto
  * another runqueue.
  */
+static void migrate_live_tasks(vcpu_t src_cpu);
+static void migrate_dead_tasks(vcpu_t dead_cpu);
 static int migration_thread(void * data)
 {
 	runqueue_t *rq;
-	int cpu = (long)data;
+	vcpu_t cpu = (vcpu_t)data;
 
-	rq = cpu_rq(cpu);
+	rq = vcpu_rq(cpu);
 	BUG_ON(rq->migration_thread != current);
 
 	set_current_state(TASK_INTERRUPTIBLE);
@@ -4183,21 +5398,21 @@ static int migration_thread(void * data)
 		struct list_head *head;
 		migration_req_t *req;
 
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 
 		spin_lock_irq(&rq->lock);
 
-		if (cpu_is_offline(cpu)) {
+		if (vcpu_is_offline(cpu)) {
 			spin_unlock_irq(&rq->lock);
 			goto wait_to_die;
 		}
-
+#ifdef CONFIG_SMP
 		if (rq->active_balance) {
 			active_load_balance(rq, cpu);
 			rq->active_balance = 0;
 		}
-
+#endif
 		head = &rq->migration_queue;
 
 		if (list_empty(head)) {
@@ -4211,12 +5426,14 @@ static int migration_thread(void * data)
 
 		if (req->type == REQ_MOVE_TASK) {
 			spin_unlock(&rq->lock);
-			__migrate_task(req->task, smp_processor_id(),
+			__migrate_task(req->task, this_vcpu(),
 					req->dest_cpu);
 			local_irq_enable();
+#ifdef CONFIG_SMP
 		} else if (req->type == REQ_SET_DOMAIN) {
 			rq->sd = req->sd;
 			spin_unlock_irq(&rq->lock);
+#endif
 		} else {
 			spin_unlock_irq(&rq->lock);
 			WARN_ON(1);
@@ -4224,8 +5441,7 @@ static int migration_thread(void * data)
 
 		complete(&req->done);
 	}
-	__set_current_state(TASK_RUNNING);
-	return 0;
+	goto die;
 
 wait_to_die:
 	/* Wait for kthread_stop */
@@ -4234,18 +5450,31 @@ wait_to_die:
 		schedule();
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
+die:
 	__set_current_state(TASK_RUNNING);
+	migrate_live_tasks(cpu);
+	spin_lock_irq(&rq->lock);
+	migrate_dead_tasks(cpu);
+	spin_unlock_irq(&rq->lock);
 	return 0;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-/* Figure out where task on dead CPU should go, use force if neccessary. */
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
+/*
+ * Figure out where task on dead CPU should go, use force if neccessary.
+ * NOTE: interrupts should be disabled by the caller
+ */
+static void move_task_off_dead_cpu(vcpu_t dead_cpu, struct task_struct *tsk)
 {
 	int dest_cpu;
+	struct vcpu_scheduler *vsched;
 	cpumask_t mask;
+	runqueue_t *rq;
+	unsigned long flags;
 
+restart:
+#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_SCHED_VCPU)
 	/* On same node? */
+#error "FIXME: wrong code in case of HOTPLUG_CPU"
 	mask = node_to_cpumask(cpu_to_node(dead_cpu));
 	cpus_and(mask, mask, tsk->cpus_allowed);
 	dest_cpu = any_online_cpu(mask);
@@ -4270,8 +5499,25 @@ static void move_task_off_dead_cpu(int d
 			       tsk->pid, tsk->comm, dead_cpu);
 	}
 	__migrate_task(tsk, dead_cpu, dest_cpu);
+#elif defined(CONFIG_SCHED_VCPU) 
+	vsched = vcpu_vsched(dead_cpu);
+	cpus_and(mask, vsched_vcpu_online_map(vsched), tsk->cpus_allowed);
+	dest_cpu = any_online_cpu(mask);
+
+	/* On any allowed CPU? */
+	if (dest_cpu == NR_CPUS) {
+		rq = task_rq_lock(tsk, &flags);
+		cpus_setall(tsk->cpus_allowed);
+		task_rq_unlock(rq, &flags);
+		dest_cpu = any_online_cpu(vsched_vcpu_online_map(vsched));
+	}
+	BUG_ON(dest_cpu == NR_CPUS);
+#endif
+	if (!__migrate_task(tsk, dead_cpu, vsched_vcpu(vsched, dest_cpu)))
+		goto restart;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * While a dead CPU has no uninterruptible tasks queued at this point,
  * it might still have a nonzero ->nr_uninterruptible counter, because
@@ -4291,25 +5537,28 @@ static void migrate_nr_uninterruptible(r
 	double_rq_unlock(rq_src, rq_dest);
 	local_irq_restore(flags);
 }
+#endif
 
 /* Run through task list and migrate tasks from the dead cpu. */
-static void migrate_live_tasks(int src_cpu)
+static void migrate_live_tasks(vcpu_t src_cpu)
 {
 	struct task_struct *tsk, *t;
 
+	BUG_ON(vcpu_isset(src_cpu, vsched_vcpu_online_map(vcpu_vsched(src_cpu))));
 	write_lock_irq(&tasklist_lock);
 
-	do_each_thread(t, tsk) {
-		if (tsk == current)
+	do_each_thread_all(t, tsk) {
+		if (tsk == current || tsk == vcpu_rq(src_cpu)->migration_thread)
 			continue;
 
-		if (task_cpu(tsk) == src_cpu)
+		if (task_vcpu(tsk) == src_cpu)
 			move_task_off_dead_cpu(src_cpu, tsk);
-	} while_each_thread(t, tsk);
+	} while_each_thread_all(t, tsk);
 
 	write_unlock_irq(&tasklist_lock);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /* Schedules idle task to be the next runnable task on current CPU.
  * It does so by boosting its priority to highest possible and adding it to
  * the _front_ of runqueue. Used by CPU offline code.
@@ -4331,6 +5580,9 @@ void sched_idle_next(void)
 
 	__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
 	/* Add idle task to _front_ of it's priority queue */
+#ifdef CONFIG_SCHED_VCPU
+#error "FIXME: VCPU vs. HOTPLUG: fix the code below"
+#endif
 	__activate_idle_task(p, rq);
 
 	spin_unlock_irqrestore(&rq->lock, flags);
@@ -4349,10 +5601,13 @@ void idle_task_exit(void)
 		switch_mm(mm, &init_mm, current);
 	mmdrop(mm);
 }
+#endif /* CONFIG_HOTPLUG_CPU */
 
-static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_SCHED_VCPU)
+/* called under rq->lock with disabled interrupts */
+static void migrate_dead(vcpu_t dead_cpu, task_t *tsk)
 {
-	struct runqueue *rq = cpu_rq(dead_cpu);
+	struct runqueue *rq = vcpu_rq(dead_cpu);
 
 	/* Must be exiting, otherwise would be on tasklist. */
 	BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD);
@@ -4366,82 +5621,123 @@ static void migrate_dead(unsigned int de
 	 * Drop lock around migration; if someone else moves it,
 	 * that's OK.  No task can be added to this CPU, so iteration is
 	 * fine.
+	 * NOTE: interrupts should be left disabled  --dev@
 	 */
-	spin_unlock_irq(&rq->lock);
+	spin_unlock(&rq->lock);
 	move_task_off_dead_cpu(dead_cpu, tsk);
-	spin_lock_irq(&rq->lock);
+	spin_lock(&rq->lock);
 
 	put_task_struct(tsk);
 }
 
 /* release_task() removes task from tasklist, so we won't find dead tasks. */
-static void migrate_dead_tasks(unsigned int dead_cpu)
+static void migrate_dead_tasks(vcpu_t dead_cpu)
 {
 	unsigned arr, i;
-	struct runqueue *rq = cpu_rq(dead_cpu);
+	struct runqueue *rq = vcpu_rq(dead_cpu);
 
 	for (arr = 0; arr < 2; arr++) {
 		for (i = 0; i < MAX_PRIO; i++) {
 			struct list_head *list = &rq->arrays[arr].queue[i];
-			while (!list_empty(list))
-				migrate_dead(dead_cpu,
-					     list_entry(list->next, task_t,
-							run_list));
+			task_t *tsk;
+restart:
+			list_for_each_entry(tsk, list, run_list) {
+				if (tsk == rq->migration_thread)
+					continue;
+				migrate_dead(dead_cpu, tsk);
+				goto restart;
+			}
 		}
 	}
 }
-#endif /* CONFIG_HOTPLUG_CPU */
+#endif /* CONFIG_HOTPLUG_CPU || SCHED_VCPU */
+
+static void migration_thread_bind(struct task_struct *k, vcpu_t cpu)
+{
+	BUG_ON(k->state != TASK_INTERRUPTIBLE);
+	/* Must have done schedule() in kthread() before we set_task_cpu */
+	wait_task_inactive(k);
+
+	set_task_vsched(k, vcpu_vsched(cpu));
+	set_task_vcpu(k, cpu);
+	k->cpus_allowed = cpumask_of_cpu(cpu->id);
+}
+
+static void migration_thread_stop(runqueue_t *rq)
+{
+	struct task_struct *thread;
+
+	thread = rq->migration_thread;
+	if (thread == NULL)
+		return;
+
+	get_task_struct(thread);
+	kthread_stop(thread);
+
+	/* We MUST ensure, that the do_exit of the migration thread is
+	 * completed and it will never scheduled again before vsched_destroy.
+	 * The task with flag PF_DEAD if unscheduled will never receive
+	 * CPU again. */
+	while (!(thread->flags & PF_DEAD) || task_running(rq, thread))
+		yield();
+	put_task_struct(thread);
+
+	rq->migration_thread = NULL;
+}
 
 /*
  * migration_call - callback that gets triggered when a CPU is added.
  * Here we can start up the necessary migration thread for the new CPU.
  */
-static int migration_call(struct notifier_block *nfb, unsigned long action,
+static int vmigration_call(struct notifier_block *nfb, unsigned long action,
 			  void *hcpu)
 {
-	int cpu = (long)hcpu;
+	vcpu_t cpu = (vcpu_t)hcpu;
 	struct task_struct *p;
 	struct runqueue *rq;
 	unsigned long flags;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-		p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
+		p = kthread_create(migration_thread, hcpu, "migration/%d/%d", 
+			vsched_id(vcpu_vsched(cpu)), cpu->id);
 		if (IS_ERR(p))
 			return NOTIFY_BAD;
 		p->flags |= PF_NOFREEZE;
-		kthread_bind(p, cpu);
-		/* Must be high prio: stop_machine expects to yield to it. */
+
+		migration_thread_bind(p, cpu);
 		rq = task_rq_lock(p, &flags);
+		/* Must be high prio: stop_machine expects to yield to it. */
 		__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
 		task_rq_unlock(rq, &flags);
-		cpu_rq(cpu)->migration_thread = p;
+		vcpu_rq(cpu)->migration_thread = p;
+		cpu_set(cpu->id, vcpu_vsched(cpu)->vcpu_online_map);
 		break;
 	case CPU_ONLINE:
 		/* Strictly unneccessary, as first user will wake it. */
-		wake_up_process(cpu_rq(cpu)->migration_thread);
+		wake_up_process(vcpu_rq(cpu)->migration_thread);
 		break;
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_SCHED_VCPU)
+#error "FIXME: CPU down code doesn't work yet with VCPUs"
+#endif
 	case CPU_UP_CANCELED:
 		/* Unbind it from offline cpu so it can run.  Fall thru. */
-		kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id());
-		kthread_stop(cpu_rq(cpu)->migration_thread);
-		cpu_rq(cpu)->migration_thread = NULL;
+		migration_thread_bind(vcpu_rq(cpu)->migration_thread, this_vcpu());
+		migration_thread_stop(vcpu_rq(cpu));
 		break;
 	case CPU_DEAD:
-		migrate_live_tasks(cpu);
-		rq = cpu_rq(cpu);
-		kthread_stop(rq->migration_thread);
-		rq->migration_thread = NULL;
+		rq = vcpu_rq(cpu);
+		migration_thread_stop(rq);
+#ifdef CONFIG_HOTPLUG_CPU
 		/* Idle task back to normal (off runqueue, low prio) */
 		rq = task_rq_lock(rq->idle, &flags);
 		deactivate_task(rq->idle, rq);
 		rq->idle->static_prio = MAX_PRIO;
 		__setscheduler(rq->idle, SCHED_NORMAL, 0);
-		migrate_dead_tasks(cpu);
 		task_rq_unlock(rq, &flags);
 		migrate_nr_uninterruptible(rq);
 		BUG_ON(rq->nr_running != 0);
+#endif
 
 		/* No need to migrate the tasks: it was best-effort if
 		 * they didn't do lock_cpu_hotplug().  Just wake up
@@ -4457,11 +5753,17 @@ static int migration_call(struct notifie
 		}
 		spin_unlock_irq(&rq->lock);
 		break;
-#endif
 	}
 	return NOTIFY_OK;
 }
 
+static int migration_call(struct notifier_block *nfb, unsigned long action,
+			  void *hcpu)
+{
+	/* we need to translate pcpu to vcpu */
+	return vmigration_call(nfb, action, vsched_default_vcpu((long)hcpu));
+}
+
 /* Register at highest priority so that task migration (migrate_all_tasks)
  * happens before everything else.
  */
@@ -4505,7 +5807,7 @@ static void cpu_attach_domain(struct sch
 {
 	migration_req_t req;
 	unsigned long flags;
-	runqueue_t *rq = cpu_rq(cpu);
+	runqueue_t *rq = vcpu_rq(vsched_default_vcpu(cpu));
 	int local = 1;
 	struct sched_domain *tmp = sd, *tmp1;
 
@@ -4525,6 +5827,7 @@ static void cpu_attach_domain(struct sch
 
 	spin_lock_irqsave(&rq->lock, flags);
 
+	pcpu(cpu)->sd = sd;
 	if (cpu == smp_processor_id() || !cpu_online(cpu)) {
 		rq->sd = sd;
 	} else {
@@ -4955,11 +6258,10 @@ void sched_domain_debug(void)
 	int i;
 
 	for_each_online_cpu(i) {
-		runqueue_t *rq = cpu_rq(i);
 		struct sched_domain *sd;
 		int level = 0;
 
-		sd = rq->sd;
+		sd = pcpu(i)->sd;
 
 		printk(KERN_DEBUG "CPU%d:\n", i);
 
@@ -4975,7 +6277,8 @@ void sched_domain_debug(void)
 			printk(KERN_DEBUG);
 			for (j = 0; j < level + 1; j++)
 				printk(" ");
-			printk("domain %d: span %s\n", level, str);
+			printk("domain %d: span %s flags 0x%x\n",
+					level, str, sd->flags);
 
 			if (!cpu_isset(i, sd->span))
 				printk(KERN_DEBUG "ERROR domain->span does not contain CPU%d\n", i);
@@ -5089,6 +6392,49 @@ void __init sched_init_smp(void)
 }
 #endif /* CONFIG_SMP */
 
+#ifdef CONFIG_VE
+/*
+ * This function is used to show fake CPU information.
+ *
+ * I'm still quite unsure that faking CPU speed is such a good idea,
+ * but someone (Kirill?) has made this decision.
+ * What I'm absolutely sure is that it's a part of virtualization,
+ * not a scheduler.  20050727 SAW
+ */
+#ifdef CONFIG_FAIRSCHED
+int scale_vcpu_frequency = 1;
+EXPORT_SYMBOL(scale_vcpu_frequency);
+
+unsigned long ve_scale_khz(unsigned long khz)
+{
+	struct fairsched_node *node;
+	int cpus;
+	unsigned long rate;
+
+	if (!scale_vcpu_frequency)
+		return khz;
+	rate = fairsched_nr_cpus << FSCHRATE_SHIFT;
+
+	/*
+	 * Ideally fairsched node should be taken from the current ve_struct.
+	 * However, to simplify the code and locking, it is taken from current
+	 * (currently fairsched_node can be changed only for a sleeping task).
+	 * That means that VE0 processes moved to some special node will get
+	 * fake CPU speed, but that shouldn't be a big problem.
+	 */
+	preempt_disable();
+	node = current->vsched->node;
+	cpus = node->vcpus;
+	if (node->rate_limited)
+		rate = node->rate;
+	preempt_enable();
+
+	return min((unsigned long long)khz,
+		((unsigned long long)khz * (rate / cpus)) >> FSCHRATE_SHIFT);
+}
+#endif
+#endif /* CONFIG_VE */
+
 int in_sched_functions(unsigned long addr)
 {
 	/* Linker adds these: start and end of __sched functions */
@@ -5098,14 +6444,10 @@ int in_sched_functions(unsigned long add
 		&& addr < (unsigned long)__sched_text_end);
 }
 
-void __init sched_init(void)
-{
-	runqueue_t *rq;
-	int i, j, k;
-
 #ifdef CONFIG_SMP
+static void init_sd(void)
+{
 	/* Set up an initial dummy domain for early boot */
-
 	memset(&sched_domain_dummy, 0, sizeof(struct sched_domain));
 	sched_domain_dummy.span = CPU_MASK_ALL;
 	sched_domain_dummy.groups = &sched_group_dummy;
@@ -5117,42 +6459,636 @@ void __init sched_init(void)
 	sched_group_dummy.cpumask = CPU_MASK_ALL;
 	sched_group_dummy.next = &sched_group_dummy;
 	sched_group_dummy.cpu_power = SCHED_LOAD_SCALE;
+}
+#else
+static void inline init_sd(void)
+{
+}
 #endif
 
-	for (i = 0; i < NR_CPUS; i++) {
-		prio_array_t *array;
+static void init_rq(struct runqueue *rq)
+{
+	int j, k;
+	prio_array_t *array;
 
-		rq = cpu_rq(i);
-		spin_lock_init(&rq->lock);
-		rq->active = rq->arrays;
-		rq->expired = rq->arrays + 1;
-		rq->best_expired_prio = MAX_PRIO;
+	spin_lock_init(&rq->lock);
+	rq->active = rq->arrays;
+	rq->expired = rq->arrays + 1;
+	rq->best_expired_prio = MAX_PRIO;
 
 #ifdef CONFIG_SMP
-		rq->sd = &sched_domain_dummy;
-		rq->cpu_load = 0;
-		rq->active_balance = 0;
-		rq->push_cpu = 0;
-		rq->migration_thread = NULL;
-		INIT_LIST_HEAD(&rq->migration_queue);
-		rq->cpu = i;
-		rq->irq_pct = 0;
-		rq->irq_quantum = IRQ_QUANTUM;
-		rq->prev_irq_ticks = 0;
-#endif
-		atomic_set(&rq->nr_iowait, 0);
-
-		for (j = 0; j < 2; j++) {
-			array = rq->arrays + j;
-			for (k = 0; k < MAX_PRIO; k++) {
-				INIT_LIST_HEAD(array->queue + k);
-				__clear_bit(k, array->bitmap);
-			}
-			// delimiter for bitsearch
-			__set_bit(MAX_PRIO, array->bitmap);
+	rq->sd = &sched_domain_dummy;
+	rq->cpu_load = 0;
+	rq->active_balance = 0;
+	rq->irq_pct = 0;
+	rq->irq_quantum = IRQ_QUANTUM;
+	rq->prev_irq_ticks = 0;
+#endif
+	rq->push_cpu = 0;
+	rq->migration_thread = NULL;
+	INIT_LIST_HEAD(&rq->migration_queue);
+	atomic_set(&rq->nr_iowait, 0);
+
+	for (j = 0; j < 2; j++) {
+		array = rq->arrays + j;
+		for (k = 0; k < MAX_PRIO; k++) {
+			INIT_LIST_HEAD(array->queue + k);
+			__clear_bit(k, array->bitmap);
+		}
+		// delimiter for bitsearch
+		__set_bit(MAX_PRIO, array->bitmap);
+	}
+}
+
+#if defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED)
+/* both rq and vsched lock should be taken */
+static void __install_vcpu(struct vcpu_scheduler *vsched, vcpu_t vcpu)
+{
+	int id;
+
+	id = vcpu->id;
+	vcpu->vsched = vsched;
+	vsched->vcpu[id] = vcpu;
+	vcpu->last_pcpu = id;
+	wmb();
+	/* FIXME: probably locking should be reworked, e.g.
+	   we don't have corresponding rmb(), so we need to update mask
+	   only after quiscent state */
+	/* init_boot_vcpu() should be remade if RCU is used here */
+	list_add(&vcpu->list, &vsched->idle_list);
+	vsched->num_online_vcpus++;
+}
+
+static int install_vcpu(vcpu_t vcpu, struct vcpu_scheduler *vsched)
+{
+	runqueue_t *rq;
+	unsigned long flags;
+	int res = 0;
+
+	rq = vcpu_rq(vcpu);
+	spin_lock_irqsave(&rq->lock, flags);
+	spin_lock(&fairsched_lock);
+
+	if (vsched->vcpu[vcpu->id] != NULL)
+		res = -EBUSY;
+	else
+		__install_vcpu(vsched, vcpu);
+
+	spin_unlock(&fairsched_lock);
+	spin_unlock_irqrestore(&rq->lock, flags);
+	return res;
+}
+
+static int __add_vcpu(struct vcpu_scheduler *vsched, int id)
+{
+	vcpu_t vcpu;
+	int res;
+
+	res = -ENOMEM;
+	vcpu = kmalloc(sizeof(struct vcpu_info), GFP_KERNEL);
+	if (vcpu == NULL)
+		goto out;
+
+	init_vcpu(vcpu, id);
+	vcpu_rq(vcpu)->curr = this_pcpu()->idle;
+	res = install_vcpu(vcpu, vsched);
+	if (res < 0)
+		goto out_free;
+#ifdef CONFIG_VE
+	memset(VE_CPU_STATS(vsched->node->owner_env, id), 0,
+			sizeof(struct ve_cpu_stats));
+	/* Kick idle time collecting logic */
+	vcpu_strt_ve_idle(vsched->node->owner_env, id, get_cycles());
+#endif
+	return 0;
+
+out_free:
+	kfree(vcpu);
+out:
+	return res;
+}
+
+void vsched_init(struct vcpu_scheduler *vsched, int id)
+{
+	memset(vsched, 0, sizeof(*vsched));
+
+	INIT_LIST_HEAD(&vsched->idle_list);
+	INIT_LIST_HEAD(&vsched->active_list);
+	INIT_LIST_HEAD(&vsched->running_list);
+	vsched->num_online_vcpus = 0;
+	vsched->vcpu_online_map = CPU_MASK_NONE;
+	vsched->vcpu_running_map = CPU_MASK_NONE;
+	vsched->pcpu_running_map = CPU_MASK_NONE;
+	vsched->id = id;
+}
+
+#ifdef CONFIG_FAIRSCHED
+
+/* No locks supposed to be held */
+static void vsched_del_vcpu(vcpu_t vcpu, int empty);
+static int vsched_add_vcpu(struct vcpu_scheduler *vsched)
+{
+	int res, err;
+	vcpu_t vcpu;
+	int id;
+	static DECLARE_MUTEX(id_mutex);
+
+	down(&id_mutex);
+	id = find_first_zero_bit(vsched->vcpu_online_map.bits, NR_CPUS);
+	if (id >= NR_CPUS) {
+		err = -EBUSY;
+		goto out_up;
+	}
+
+	err = __add_vcpu(vsched, id);
+	if (err < 0)
+		goto out_up;
+
+	vcpu = vsched_vcpu(vsched, id);
+	err = -ENOMEM;
+
+	res = vmigration_call(&migration_notifier, CPU_UP_PREPARE, vcpu);
+	if (res != NOTIFY_OK)
+		goto out_del_up;
+
+	res = vmigration_call(&migration_notifier, CPU_ONLINE, vcpu);
+	if (res != NOTIFY_OK)
+		goto out_cancel_del_up;
+
+	err = 0;
+
+out_up:
+	up(&id_mutex);
+	return err;
+
+out_cancel_del_up:
+	vmigration_call(&migration_notifier, CPU_UP_CANCELED, vcpu);
+out_del_up:
+	vsched_del_vcpu(vcpu, 0);
+	goto out_up;
+}
+
+static inline void offline_vcpu(struct vcpu_scheduler *vsched, int cpu,
+		runqueue_t *rq)
+{
+	spin_lock_irq(&rq->lock);
+	spin_lock(&fairsched_lock);
+	cpu_clear(cpu, vsched->vcpu_online_map);
+	vsched->num_online_vcpus--;
+	spin_unlock(&fairsched_lock);
+	spin_unlock_irq(&rq->lock);
+}
+
+/* Move stat from dead vcpu to any online vcpu */
+void move_vcpu_stat(vcpu_t src_vcpu)
+{
+	struct vcpu_scheduler *vsched;
+	struct ve_cpu_stats *src_stat;
+	struct ve_cpu_stats *dst_stat;
+	struct rq *rq_src;
+	struct rq *rq_dst;
+	unsigned long flags;
+	cpumask_t mask;
+	int dst_cpu;
+
+	if (src_vcpu->id == 0)
+		return;
+
+	rq_src = vcpu_rq(src_vcpu);
+	vsched = vcpu_vsched(src_vcpu);
+	cpus_and(mask, vsched_vcpu_online_map(vsched), CPU_MASK_ALL);
+
+	dst_cpu = any_online_cpu(mask);
+	if (dst_cpu == NR_CPUS)
+		return;
+
+	rq_dst = vcpu_rq(vsched_vcpu(vsched, dst_cpu));
+
+	local_irq_save(flags);
+	double_rq_lock(rq_src, rq_dst);
+
+	src_stat = VE_CPU_STATS(vsched->node->owner_env, src_vcpu->id);
+	dst_stat = VE_CPU_STATS(vsched->node->owner_env, dst_cpu);
+	dst_stat->nr_running += src_stat->nr_running;
+	dst_stat->nr_unint += src_stat->nr_unint;
+	src_stat->nr_running = 0;
+	src_stat->nr_unint = 0;
+
+	double_rq_unlock(rq_src, rq_dst);
+	local_irq_restore(flags);
+}
+
+static inline void del_vcpu(struct vcpu_scheduler *vsched, int cpu,
+		vcpu_t vcpu)
+{
+	spin_lock_irq(&fairsched_lock);
+	list_del(&vcpu->list);
+	vsched_vcpu(vsched, cpu) = NULL;
+	spin_unlock_irq(&fairsched_lock);
+
+	move_vcpu_stat(vcpu);
+	kfree(vcpu);
+}
+
+static void vsched_del_vcpu(vcpu_t vcpu, int empty)
+{
+	struct vcpu_scheduler *vsched;
+	runqueue_t *rq;
+
+	vsched = vcpu_vsched(vcpu);
+	rq = vcpu_rq(vcpu);
+
+	offline_vcpu(vsched, vcpu->id, rq);
+
+	/* no need to syncronize, if no tasks at all */
+	if (!empty)
+		synchronize_kernel();
+
+	/*
+	 * all tasks should migrate from this VCPU somewhere,
+	 * also, since this moment VCPU is offline, so migration_thread
+	 * won't accept any new tasks...
+	 */
+	vmigration_call(&migration_notifier, CPU_DEAD, vcpu);
+	BUG_ON(rq->nr_running != 0);
+
+	/* vcpu_put() is called after deactivate_task. This loop makes sure
+	 * that vcpu_put() was finished and vcpu can be freed */
+	while ((volatile int)vcpu->running)
+		yield();
+
+	BUG_ON(vcpu->active);	/* should be in idle_list */
+
+	del_vcpu(vsched, vcpu->id, vcpu);
+}
+
+void fini_idle(int cpu)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	runqueue_t *rq;
+
+	vsched = &idle_vsched;
+	vcpu = vsched_vcpu(vsched, cpu);
+	rq = vcpu_rq(vcpu);
+	offline_vcpu(vsched, vcpu->id, rq);
+	del_vcpu(vsched, vcpu->id, vcpu);
+}
+
+int vsched_set_vcpus(struct vcpu_scheduler *vsched, unsigned int vcpus)
+{
+	int i, ret = 0;
+	vcpu_t vcpu;
+
+	if (vsched->num_online_vcpus < vcpus) {
+		/* need to add more VCPUs */
+		for (i = vcpus - vsched->num_online_vcpus; i > 0; i--) {
+			ret = vsched_add_vcpu(vsched);
+			if (ret < 0)
+				break;
+		}
+	} else if (vsched->num_online_vcpus > vcpus) {
+		/* remove some VCPUs */
+		while (vcpus != vsched->num_online_vcpus) {
+			vcpu = vsched_vcpu(vsched, vsched->num_online_vcpus - 1);
+			BUG_ON(!vcpu);
+			vsched_del_vcpu(vcpu, 0);
 		}
 	}
+#ifdef CONFIG_FAIRSCHED
+	vsched->node->vcpus = vsched->num_online_vcpus;
+#endif
+	return ret;
+}
+
+int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched)
+{
+	vcpu_t dest_vcpu;
+	int id;
+
+	id = first_cpu(vsched->vcpu_online_map);
+	if (id >= NR_CPUS)
+		goto err;
+
+	dest_vcpu = vsched_vcpu(vsched, id);
+	set_cpus_allowed(p, CPU_MASK_ALL);
+	sched_migrate_task(p, dest_vcpu);
+
+	if (task_vsched_id(p) != vsched_id(vsched)) {
+		/* race: probably someone changed cpus_allowed? */
+		printk("vsched_mvpr: failed to move task\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	return -EINVAL;
+}
+
+void vsched_fairsched_link(struct vcpu_scheduler *vsched,
+		struct fairsched_node *node)
+{
+	vsched->node = node;
+	node->vsched = vsched;
+}
+
+void vsched_fairsched_unlink(struct vcpu_scheduler *vsched,
+		struct fairsched_node *node)
+{
+	vsched->node = NULL;
+	node->vsched = NULL;
+}
+
+int vsched_create(int id, struct fairsched_node *node)
+{
+	struct vcpu_scheduler *vsched;
+	int res, cpus;
+
+	vsched = kmalloc(sizeof(*vsched), GFP_KERNEL);
+	if (vsched == NULL)
+		return -ENOMEM;
+
+	vsched_init(vsched, node->id);
+	vsched_fairsched_link(vsched, node);
+
+	cpus = node->vcpus ? : num_online_cpus();
+	res = vsched_set_vcpus(vsched, cpus);
+	if (res < 0)
+		goto err_add;
+
+	return 0;
+
+err_add:
+	vsched_destroy(vsched);
+	return res;
+}
+
+int vsched_destroy(struct vcpu_scheduler *vsched)
+{
+	if (vsched == NULL)
+		return 0;
+
+	vsched_set_vcpus(vsched, 0);
+
+	spin_lock_irq(&fairsched_lock);
+	if (vsched->num_online_vcpus ||
+	    !list_empty(&vsched->running_list) ||
+	    !list_empty(&vsched->active_list) ||
+	    !list_empty(&vsched->idle_list))
+		goto err_busy;
+
+	vsched_fairsched_unlink(vsched, vsched->node);
+	spin_unlock_irq(&fairsched_lock);
+
+	kfree(vsched);
+	return 0;
+
+err_busy:
+	oops_in_progress = 1;
+	printk(KERN_ERR "BUG in vsched_destroy, id %d: n%d r%d a%d i%d\n",
+			vsched->id,
+			vsched->num_online_vcpus,
+			!list_empty(&vsched->running_list),
+			!list_empty(&vsched->active_list),
+			!list_empty(&vsched->idle_list));
+	spin_unlock_irq(&fairsched_lock);
+	oops_in_progress = 0;
+	return -EBUSY;
+	
+}
+#endif /* defined(CONFIG_FAIRSCHED) */
+#endif /* defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED) */
+
+static void init_boot_vcpu(void)
+{
+	int res;
+
+	/*
+	 * We setup boot_vcpu and it's runqueue until init_idle() happens
+	 * on cpu0. This is required since timer interrupts can happen
+	 * between sched_init() and init_idle().
+	 */
+	init_vcpu(&boot_idle_vcpu, 0);
+	vcpu_rq(&boot_idle_vcpu)->curr = current;
+	res = install_vcpu(&boot_idle_vcpu, &idle_vsched);
+	if (res < 0)
+		panic("Can't install boot idle vcpu");
+
+	init_vcpu(&boot_vcpu, 0);
+	vcpu_rq(&boot_vcpu)->curr = current;
+	res = install_vcpu(&boot_vcpu, &default_vsched);
+	if (res < 0)
+		panic("Can't install boot vcpu");
+
+	this_pcpu()->vcpu = &boot_idle_vcpu;
+	this_pcpu()->vsched = &idle_vsched;
+}
+
+static void init_pcpu(int id)
+{
+	struct pcpu_info *pcpu;
+
+	pcpu = pcpu(id);
+	pcpu->id = id;
+#ifdef CONFIG_SMP
+	pcpu->sd = &sched_domain_dummy;
+#endif
+
+#ifndef CONFIG_SCHED_VCPU
+	init_vcpu(vcpu(id), id);
+#endif
+}
+
+static void init_pcpus(void)
+{
+	int i;
+	for (i = 0; i < NR_CPUS; i++)
+		init_pcpu(i);
+}
+
+#ifdef CONFIG_SCHED_VCPU
+static void show_vcpu_list(struct vcpu_scheduler *vsched, struct list_head *lh)
+{
+	cpumask_t m;
+	vcpu_t vcpu;
+	int i;
+
+	cpus_clear(m);
+	list_for_each_entry(vcpu, lh, list)
+		cpu_set(vcpu->id, m);
+
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_isset(i, m))
+			printk("%d ", i);
+}
+
+#define PRINT(s, sz, fmt...)				\
+	do {						\
+		int __out;				\
+		__out = scnprintf(*s, *sz, fmt);	\
+		*s += __out;				\
+		*sz -= __out;				\
+	} while(0)
+
+static void show_rq_array(prio_array_t *array, char *header, char **s, int *sz)
+{
+	struct list_head *list;
+	task_t *p;
+	int k, h;
+
+	h = 0;
+	for (k = 0; k < MAX_PRIO; k++) {
+		list = array->queue + k;
+		if (list_empty(list))
+			continue;
+
+		if (!h) {
+			PRINT(s, sz, header);
+			h = 1;
+		}
+
+		PRINT(s, sz, " prio %d (", k);
+		list_for_each_entry(p, list, run_list)
+			PRINT(s, sz, "%s[%d] ", p->comm, p->pid);
+		PRINT(s, sz, ")");
+	}
+	if (h)
+		PRINT(s, sz, "\n");
+}
+
+static void show_vcpu(vcpu_t vcpu)
+{
+	runqueue_t *rq;
+	char buf[1024], *s;
+	unsigned long flags;
+	int sz;
+
+	if (vcpu == NULL)
+		return;
+
+	rq = vcpu_rq(vcpu);
+	spin_lock_irqsave(&rq->lock, flags);
+	printk("  vcpu %d: last_pcpu %d, state %s%s\n",
+			vcpu->id, vcpu->last_pcpu,
+			vcpu->active ? "A" : "",
+			vcpu->running ? "R" : "");
+
+	printk("    rq: running %lu, load %lu, sw %Lu, sd %p\n",
+			rq->nr_running,
+#ifdef CONFIG_SMP
+			rq->cpu_load,
+#else
+			0LU,
+#endif
+			rq->nr_switches,
+#ifdef CONFIG_SMP
+			rq->sd
+#else
+			NULL
+#endif
+	      );
+
+	s = buf;
+	sz = sizeof(buf) - 1;
+
+	show_rq_array(rq->active, "      active:", &s, &sz);
+	show_rq_array(rq->expired, "      expired:", &s, &sz);
+	spin_unlock_irqrestore(&rq->lock, flags);
+
+	*s = 0;
+	printk(buf);
+}
+
+static inline void fairsched_show_node(struct vcpu_scheduler *vsched)
+{
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node;
+
+	node = vsched->node;
+	printk("fsnode: ready %d run %d cpu %d vsched %p, pcpu %d\n",
+			node->nr_ready, node->nr_runnable, node->nr_pcpu,
+			node->vsched, smp_processor_id());
+#endif
+}
+
+static void __show_vsched(struct vcpu_scheduler *vsched)
+{
+	char mask[NR_CPUS + 1];
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fairsched_lock, flags);
+	printk("vsched id=%d\n", vsched_id(vsched));
+	fairsched_show_node(vsched);
+
+	printk("  idle cpus ");
+	show_vcpu_list(vsched, &vsched->idle_list);
+	printk("; active cpus ");
+	show_vcpu_list(vsched, &vsched->active_list);
+	printk("; running cpus ");
+	show_vcpu_list(vsched, &vsched->running_list);
+	printk("\n");
+
+	cpumask_scnprintf(mask, NR_CPUS, vsched->vcpu_online_map);
+	printk("  num_online_cpus=%d, mask=%s (w=%d)\n",
+			vsched->num_online_vcpus, mask,
+			cpus_weight(vsched->vcpu_online_map));
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+
+	for (i = 0; i < NR_CPUS; i++)
+		show_vcpu(vsched->vcpu[i]);
+}
+
+void show_vsched(void)
+{
+	oops_in_progress = 1;
+	__show_vsched(&idle_vsched);
+	__show_vsched(&default_vsched);
+	oops_in_progress = 0;
+}
+#endif /* CONFIG_SCHED_VCPU */
+
+int __devinit vsched_init_default(int cpu)
+{
+	if (cpu > 0)
+		return __add_vcpu(&default_vsched, cpu);
+	return 0;	
+}
+
+void __devinit vsched_fini_default(int cpu)
+{
+	vcpu_t vcpu;
+	runqueue_t *rq;
+
+	if (cpu == 0)
+		return;
+
+	vcpu = vsched_vcpu(&default_vsched, cpu);
+	rq = vcpu_rq(vcpu);
+
+	offline_vcpu(&default_vsched, cpu, rq);
+	del_vcpu(&default_vsched, cpu, vcpu);
+}
 
+void __init sched_init(void)
+{
+	init_sd();
+	init_pcpus();
+#if defined(CONFIG_SCHED_VCPU)
+	vsched_init(&idle_vsched, -1);
+	vsched_init(&default_vsched, 0);
+#if defined(CONFIG_FAIRSCHED)
+	fairsched_init_early();
+	vsched_fairsched_link(&idle_vsched, &fairsched_idle_node);
+	vsched_fairsched_link(&default_vsched, &fairsched_init_node);
+#endif
+	init_boot_vcpu();
+#else
+#if defined(CONFIG_FAIRSCHED)
+	fairsched_init_early();
+#endif
+#endif
 	/*
 	 * The boot idle thread does lazy MMU switching as well:
 	 */
@@ -5192,3 +7128,7 @@ void __might_sleep(char *file, int line,
 }
 EXPORT_SYMBOL(__might_sleep);
 #endif
+
+EXPORT_SYMBOL(ve_sched_get_idle_time);
+EXPORT_SYMBOL(nr_running_ve);
+EXPORT_SYMBOL(nr_uninterruptible_ve);
diff -Nurap linux-2.6.9-100.orig/kernel/signal.c linux-2.6.9-ve023stab054/kernel/signal.c
--- linux-2.6.9-100.orig/kernel/signal.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/signal.c	2011-06-15 19:26:22.000000000 +0400
@@ -12,6 +12,7 @@
 
 #include <linux/config.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/module.h>
 #include <linux/smp_lock.h>
 #include <linux/init.h>
@@ -28,13 +29,17 @@
 #include <asm/unistd.h>
 #include <asm/siginfo.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_misc.h>
+
 extern void k_getrusage(struct task_struct *, int, struct rusage *);
 
 /*
  * SLAB caches for signal bits.
  */
 
-static kmem_cache_t *sigqueue_cachep;
+kmem_cache_t *sigqueue_cachep;
+EXPORT_SYMBOL(sigqueue_cachep);
 
 /*
  * In POSIX a signal is sent either to a specific thread (Linux task)
@@ -159,6 +164,26 @@ static kmem_cache_t *sigqueue_cachep;
 	 sigtestsetmask(&current->signal->shared_pending.signal, \
 						  M(SIGCONT) | M(SIGKILL)))
 
+static int sig_ve_ignored(int sig, struct siginfo *info, struct task_struct *t)
+{
+	struct ve_struct *ve;
+
+	if (ve_allow_init_signals)
+		return 0;
+
+	/* always allow signals from the kernel */
+	if (info == SEND_SIG_FORCED ||
+		       (!is_si_special(info) && SI_FROMKERNEL(info)))
+		return 0;
+
+	ve = current->ve_task_info.owner_env;
+	if (ve->init_entry != t)
+		return 0;
+	if (ve_is_super(get_exec_env()))
+		return 0;
+	return !sig_user_defined(t, sig) || sig_kernel_only(sig);
+}
+
 static int sig_ignored(struct task_struct *t, int sig)
 {
 	void __user * handler;
@@ -218,6 +243,7 @@ static inline int has_pending_signals(si
 static int recalc_sigpending_tsk(struct task_struct *t)
 {
 	if (t->signal->group_stop_count > 0 ||
+	    test_tsk_thread_flag(t,TIF_FREEZE) ||
 	    PENDING(&t->pending, &t->blocked) ||
 	    PENDING(&t->signal->shared_pending, &t->blocked)) {
 		set_tsk_thread_flag(t, TIF_SIGPENDING);
@@ -230,6 +256,7 @@ static int recalc_sigpending_tsk(struct 
 	 */
 	return 0;
 }
+EXPORT_SYMBOL(recalc_sigpending_tsk);
 
 /*
  * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
@@ -304,8 +331,16 @@ static struct sigqueue *__sigqueue_alloc
 	atomic_inc(&user->sigpending);
 	if (override_rlimit ||
 	    atomic_read(&user->sigpending) <
-			t->rlim[RLIMIT_SIGPENDING].rlim_cur)
+			t->rlim[RLIMIT_SIGPENDING].rlim_cur) {
 		q = kmem_cache_alloc(sigqueue_cachep, flags);
+		if (q != NULL) {
+			if (ub_siginfo_charge(get_task_ub(t),
+					kmem_cache_memusage(sigqueue_cachep))) {
+				kfree(q);
+				q = NULL;
+			}
+		}
+	}
 	if (unlikely(q == NULL)) {
 		atomic_dec(&user->sigpending);
 	} else {
@@ -313,6 +348,7 @@ static struct sigqueue *__sigqueue_alloc
 		q->flags = 0;
 		q->lock = NULL;
 		q->user = get_uid(user);
+		sig_ub(q) = get_beancounter(get_task_ub(t));
 	}
 	return(q);
 }
@@ -321,6 +357,8 @@ static inline void __sigqueue_free(struc
 {
 	if (q->flags & SIGQUEUE_PREALLOC)
 		return;
+	ub_siginfo_uncharge(sig_ub(q), kmem_cache_memusage(sigqueue_cachep));
+	put_beancounter(sig_ub(q));
 	atomic_dec(&q->user->sigpending);
 	free_uid(q->user);
 	kmem_cache_free(sigqueue_cachep, q);
@@ -388,8 +426,11 @@ void __exit_signal(struct task_struct *t
 		BUG();
 	spin_lock(&sighand->siglock);
 	if (atomic_dec_and_test(&sig->count)) {
-		if (tsk == sig->curr_target)
+		if (tsk == sig->curr_target) {
 			sig->curr_target = next_thread(tsk);
+			if (tsk == sig->curr_target)
+				sig->curr_target = NULL;
+		}
 		tsk->signal = NULL;
 		spin_unlock(&sighand->siglock);
 		flush_sigqueue(&sig->shared_pending);
@@ -402,8 +443,11 @@ void __exit_signal(struct task_struct *t
 			wake_up_process(sig->group_exit_task);
 			sig->group_exit_task = NULL;
 		}
-		if (tsk == sig->curr_target)
+		if (tsk == sig->curr_target) {
 			sig->curr_target = next_thread(tsk);
+			if (tsk == sig->curr_target)
+				sig->curr_target = NULL;
+		}
 		tsk->signal = NULL;
 		/*
 		 * Accumulate here the counters for all threads but the
@@ -628,6 +672,7 @@ void signal_wake_up(struct task_struct *
 	if (!wake_up_state(t, mask))
 		kick_process(t);
 }
+EXPORT_SYMBOL(signal_wake_up);
 
 /*
  * Remove signals in mask from the pending set and queue.
@@ -818,7 +863,7 @@ static int send_signal(int sig, struct s
 			q->info.si_signo = sig;
 			q->info.si_errno = 0;
 			q->info.si_code = SI_USER;
-			q->info.si_pid = current->pid;
+			q->info.si_pid = virt_pid(current);
 			q->info.si_uid = current->uid;
 			break;
 		case 1:
@@ -977,7 +1022,6 @@ __group_complete_signal(int sig, struct 
 		if (t == NULL)
 			/* restart balancing at this thread */
 			t = p->signal->curr_target = p;
-		BUG_ON(t->tgid != p->tgid);
 
 		while (!wants_signal(sig, t)) {
 			t = next_thread(t);
@@ -1138,7 +1182,8 @@ int group_send_sig_info(int sig, struct 
 	ret = check_kill_permission(sig, info, p);
 	if (!ret && sig && p->sighand) {
 		spin_lock_irqsave(&p->sighand->siglock, flags);
-		ret = __group_send_sig_info(sig, info, p);
+		ret = sig_ve_ignored(sig, info, p) ? 0 :
+			 __group_send_sig_info(sig, info, p);
 		spin_unlock_irqrestore(&p->sighand->siglock, flags);
 	}
 
@@ -1158,13 +1203,18 @@ int __kill_pg_info(int sig, struct sigin
 	if (pgrp <= 0)
 		return -EINVAL;
 
+	/* Use __vpid_to_pid(). This function is used under write_lock
+	 * tasklist_lock. */
+	if (is_virtual_pid(pgrp))
+		pgrp = __vpid_to_pid(pgrp);
+
 	success = 0;
 	retval = -ESRCH;
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		int err = group_send_sig_info(sig, info, p);
 		success |= !err;
 		retval = err;
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return success ? 0 : retval;
 }
 
@@ -1187,7 +1237,7 @@ kill_proc_info(int sig, struct siginfo *
 	struct task_struct *p;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	error = -ESRCH;
 	if (p)
 		error = group_send_sig_info(sig, info, p);
@@ -1206,7 +1256,7 @@ int kill_proc_info_as_uid(int sig, struc
 		return ret;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p) {
 		ret = -ESRCH;
 		goto out_unlock;
@@ -1276,8 +1326,8 @@ static int kill_something_info(int sig, 
 		struct task_struct * p;
 
 		read_lock(&tasklist_lock);
-		for_each_process(p) {
-			if (p->pid > 1 && p->tgid != current->tgid) {
+		for_each_process_ve(p) {
+			if (virt_pid(p) > 1 && p->tgid != current->tgid) {
 				int err = group_send_sig_info(sig, info, p);
 				++count;
 				if (err != -EPERM)
@@ -1558,9 +1608,16 @@ void do_notify_parent(struct task_struct
 	BUG_ON(!tsk->ptrace &&
 	       (tsk->group_leader != tsk || !thread_group_empty(tsk)));
 
+#ifdef CONFIG_VE
+	/* Allow to send only SIGCHLD from VE */
+	if (sig != SIGCHLD &&
+	   VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(tsk->parent)->owner_env)
+		sig = SIGCHLD;
+#endif
+
 	info.si_signo = sig;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(tsk->parent)->owner_env);
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1617,7 +1674,7 @@ do_notify_parent_cldstop(struct task_str
 
 	info.si_signo = SIGCHLD;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(parent)->owner_env);
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1675,6 +1732,7 @@ static void ptrace_stop(int exit_code, i
 	current->exit_code = exit_code;
 
 	/* Let the debugger run.  */
+	set_pn_state(current, PN_STOP_SIGNAL);
 	set_current_state(TASK_TRACED);
 	spin_unlock_irq(&current->sighand->siglock);
 	read_lock(&tasklist_lock);
@@ -1695,6 +1753,7 @@ static void ptrace_stop(int exit_code, i
 		set_current_state(TASK_RUNNING);
 		current->exit_code = nostop_code;
 	}
+	clear_pn_state(current);
 
 	/*
 	 * We are back.  Now reacquire the siglock before touching
@@ -1754,7 +1813,9 @@ finish_stop(int stop_count)
 		read_unlock(&tasklist_lock);
 	}
 
+	set_stop_state(current);
 	schedule();
+	clear_stop_state(current);
 	/*
 	 * Now we don't run again until continued.
 	 */
@@ -1956,7 +2017,7 @@ relock:
 				info->si_signo = signr;
 				info->si_errno = 0;
 				info->si_code = SI_USER;
-				info->si_pid = current->parent->pid;
+				info->si_pid = virt_pid(current->parent);
 				info->si_uid = current->parent->uid;
 			}
 
@@ -1987,7 +2048,7 @@ relock:
 			continue;
 
 		/* Init gets no signals it doesn't want.  */
-		if (current->pid == 1)
+		if (current == child_reaper)
 			continue;
 
 		if (sig_kernel_stop(signr)) {
@@ -2344,7 +2405,7 @@ sys_kill(int pid, int sig)
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_USER;
-	info.si_pid = current->tgid;
+	info.si_pid = virt_tgid(current);
 	info.si_uid = current->uid;
 
 	return kill_something_info(sig, &info, pid);
@@ -2373,13 +2434,13 @@ asmlinkage long sys_tgkill(int tgid, int
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_TKILL;
-	info.si_pid = current->tgid;
+	info.si_pid = virt_tgid(current);
 	info.si_uid = current->uid;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	error = -ESRCH;
-	if (p && (p->tgid == tgid)) {
+	if (p && (virt_tgid(p) == tgid)) {
 		error = check_kill_permission(sig, &info, p);
 		/*
 		 * The null signal is a permissions and process existence
@@ -2387,8 +2448,10 @@ asmlinkage long sys_tgkill(int tgid, int
 		 */
 		if (!error && sig && p->sighand) {
 			spin_lock_irq(&p->sighand->siglock);
-			handle_stop_signal(sig, p);
-			error = specific_send_sig_info(sig, &info, p);
+			if (!sig_ve_ignored(sig, &info, p)) {
+				handle_stop_signal(sig, p);
+				error = specific_send_sig_info(sig, &info, p);
+			}
 			spin_unlock_irq(&p->sighand->siglock);
 		}
 	}
@@ -2413,11 +2476,11 @@ sys_tkill(int pid, int sig)
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_TKILL;
-	info.si_pid = current->tgid;
+	info.si_pid = virt_tgid(current);
 	info.si_uid = current->uid;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	error = -ESRCH;
 	if (p) {
 		error = check_kill_permission(sig, &info, p);
@@ -2749,5 +2812,5 @@ void __init signals_init(void)
 		kmem_cache_create("sigqueue",
 				  sizeof(struct sigqueue),
 				  __alignof__(struct sigqueue),
-				  SLAB_PANIC, NULL, NULL);
+				  SLAB_PANIC|SLAB_UBC, NULL, NULL);
 }
diff -Nurap linux-2.6.9-100.orig/kernel/softirq.c linux-2.6.9-ve023stab054/kernel/softirq.c
--- linux-2.6.9-100.orig/kernel/softirq.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/softirq.c	2011-06-15 19:26:19.000000000 +0400
@@ -16,8 +16,10 @@
 #include <linux/cpu.h>
 #include <linux/kthread.h>
 #include <linux/rcupdate.h>
+#include <linux/sysctl.h>
 
 #include <asm/irq.h>
+#include <ub/beancounter.h>
 /*
    - No shared variables, all the data are CPU local.
    - If a softirq needs serialization, let it serialize itself
@@ -44,6 +46,8 @@ EXPORT_SYMBOL(irq_stat);
 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
 
 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+static DEFINE_PER_CPU(struct task_struct *, ksoftirqd_wakeup);
+static int ksoftirqd_stat[NR_CPUS];
 
 /*
  * we cannot loop indefinitely here to avoid userspace starvation,
@@ -54,7 +58,7 @@ static DEFINE_PER_CPU(struct task_struct
 static inline void wakeup_softirqd(void)
 {
 	/* Interrupts are disabled: no need to stop preemption */
-	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
+	struct task_struct *tsk = __get_cpu_var(ksoftirqd_wakeup);
 
 	if (tsk && tsk->state != TASK_RUNNING)
 		wake_up_process(tsk);
@@ -77,10 +81,13 @@ asmlinkage void __do_softirq(void)
 	__u32 pending;
 	int max_restart = MAX_SOFTIRQ_RESTART;
 	int cpu;
+	struct user_beancounter *old_exec_ub;
+	struct ve_struct *envid;
 
 	pending = local_softirq_pending();
 
 	local_bh_disable();
+	envid = set_exec_env(get_ve0());
 	cpu = smp_processor_id();
 restart:
 	/* Reset the pending bitmask before enabling irqs */
@@ -90,6 +97,8 @@ restart:
 
 	h = softirq_vec;
 
+	old_exec_ub = set_exec_ub(get_ub0());
+
 	do {
 		if (pending & 1) {
 			h->action(h);
@@ -99,6 +108,8 @@ restart:
 		pending >>= 1;
 	} while (pending);
 
+	(void)set_exec_ub(old_exec_ub);
+
 	local_irq_disable();
 
 	pending = local_softirq_pending();
@@ -108,6 +119,7 @@ restart:
 	if (pending)
 		wakeup_softirqd();
 
+	(void)set_exec_env(envid);
 	__local_bh_enable();
 }
 
@@ -492,6 +504,52 @@ static int __devinit cpu_callback(struct
 	return NOTIFY_OK;
 }
 
+static int proc_ksoftirqd(ctl_table *ctl, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret, cpu;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	if (!write)
+		return ret;
+
+	for_each_online_cpu(cpu) {
+		per_cpu(ksoftirqd_wakeup, cpu) =
+			ksoftirqd_stat[cpu] ? per_cpu(ksoftirqd, cpu) : NULL;
+	}
+	return ret;
+}
+
+static int sysctl_ksoftirqd(ctl_table *table, int *name, int nlen,
+		void *oldval, size_t *oldlenp, void *newval, size_t newlen,
+		void **context)
+{
+	return -EINVAL;
+}
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 1246,
+		.procname	= "ksoftirqd",
+		.data		= ksoftirqd_stat,
+		.maxlen		= sizeof(ksoftirqd_stat),
+		.mode		= 0644,
+		.proc_handler	= &proc_ksoftirqd,
+		.strategy	= &sysctl_ksoftirqd
+	},
+	{0}
+};
+
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table
+	},
+	{0}
+};
+
 static struct notifier_block __devinitdata cpu_nfb = {
 	.notifier_call = cpu_callback
 };
@@ -502,5 +560,6 @@ __init int spawn_ksoftirqd(void)
 	cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 	register_cpu_notifier(&cpu_nfb);
+	register_sysctl_table(root_table, 0);
 	return 0;
 }
diff -Nurap linux-2.6.9-100.orig/kernel/stop_machine.c linux-2.6.9-ve023stab054/kernel/stop_machine.c
--- linux-2.6.9-100.orig/kernel/stop_machine.c	2004-10-19 01:53:10.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/stop_machine.c	2011-06-15 19:26:22.000000000 +0400
@@ -4,13 +4,39 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/syscalls.h>
+#include <linux/delay.h>
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
+#include <asm/uaccess.h>
 
 /* Since we effect priority and affinity (both of which are visible
  * to, and settable by outside processes) we do indirection via a
  * kthread. */
 
+struct stop_machine_task {
+	struct list_head list;
+	struct task_struct *task;
+	unsigned int state;
+	unsigned int data[2];
+};
+
+static struct list_head sm_queue = LIST_HEAD_INIT(sm_queue);
+static spinlock_t sm_lock = SPIN_LOCK_UNLOCKED;
+
+static inline void sm_add(struct stop_machine_task *smt)
+{
+	spin_lock(&sm_lock);
+	list_add(&smt->list, &sm_queue);
+	spin_unlock(&sm_lock);
+}
+
+static inline void sm_del(struct stop_machine_task *smt)
+{
+	spin_lock(&sm_lock);
+	list_del(&smt->list);
+	spin_unlock(&sm_lock);
+}
+
 /* Thread to stop each CPU in user context. */
 enum stopmachine_state {
 	STOPMACHINE_WAIT,
@@ -19,15 +45,65 @@ enum stopmachine_state {
 	STOPMACHINE_EXIT,
 };
 
+enum {
+	SM_SLAVE_INIT,
+	SM_SLAVE_EXIT,
+
+	SM_STOPPER_FORKING,
+	SM_STOPPER_WAITING,
+	SM_STOPPER_READY,
+	SM_STOPPER_INIT,
+	SM_STOPPER_WORK,
+	SM_STOPPER_RESTART,
+	SM_STOPPER_OK,
+	SM_STOPPER_ERR,
+
+	SM_CALLER_INIT,
+	SM_CALLER_LOCKED,
+	SM_CALLER_WAIT,
+};
+
 static enum stopmachine_state stopmachine_state;
 static unsigned int stopmachine_num_threads;
 static atomic_t stopmachine_thread_ack;
 static DECLARE_MUTEX(stopmachine_mutex);
 
+void stop_machine_show_state(void)
+{
+	struct stop_machine_task *smt;
+	struct task_struct *tsk;
+
+	printk("  state: %d\n", stopmachine_state);
+	printk("  num threads: %d\n", stopmachine_num_threads);
+
+	spin_lock(&sm_lock);
+	list_for_each_entry (smt, &sm_queue, list) {
+		tsk = smt->task;
+		printk("   task %d (%p) state %ld: pcpu %d vcpu %d vsched %d"
+				" sm_state %d (data %d %d)\n",
+				tsk->pid,
+				tsk,
+				tsk->state,
+				task_pcpu(tsk),
+				task_cpu(tsk),
+				task_vsched_id(tsk),
+				smt->state,
+				smt->data[0],
+				smt->data[1]);
+	}
+	spin_unlock(&sm_lock);
+}
+
 static int stopmachine(void *cpu)
 {
 	int irqs_disabled = 0;
 	int prepared = 0;
+	struct stop_machine_task smt;
+
+	smt.state = SM_SLAVE_INIT;
+	smt.task = current;
+	smt.data[0] = smt.data[1] = 0;
+	sm_add(&smt);
 
 	set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
 
@@ -52,9 +128,16 @@ static int stopmachine(void *cpu)
 			mb(); /* Must read state first. */
 			atomic_inc(&stopmachine_thread_ack);
 		}
-		cpu_relax();
+		/* Yield in first stage: migration threads need to
+		 * help our sisters onto their CPUs. */
+		if (!prepared && !irqs_disabled)
+			msleep(10);
+		else
+			cpu_relax();
 	}
 
+	smt.state = SM_SLAVE_EXIT;
+
 	/* Ack: we are exiting. */
 	mb(); /* Must read state first. */
 	atomic_inc(&stopmachine_thread_ack);
@@ -64,6 +147,7 @@ static int stopmachine(void *cpu)
 	if (prepared)
 		preempt_enable();
 
+	sm_del(&smt);
 	return 0;
 }
 
@@ -77,20 +161,28 @@ static void stopmachine_set_state(enum s
 		cpu_relax();
 }
 
-static int stop_machine(void)
+static int stop_machine(struct stop_machine_task *smt)
 {
 	int i, ret = 0;
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+	mm_segment_t old_fs = get_fs();
 
 	/* One high-prio thread per cpu.  We'll do this one. */
-	sys_sched_setscheduler(current->pid, SCHED_FIFO, &param);
+	set_fs(KERNEL_DS);
+	sys_sched_setscheduler(current->pid, SCHED_FIFO,
+				(struct sched_param __user *)&param);
+	set_fs(old_fs);
 
 	atomic_set(&stopmachine_thread_ack, 0);
 	stopmachine_num_threads = 0;
 	stopmachine_state = STOPMACHINE_WAIT;
 
+	smt->data[0] = task_cpu(current);
+	smt->data[1] = task_pcpu(current);
+	smt->state = SM_STOPPER_FORKING;
+
 	for_each_online_cpu(i) {
-		if (i == smp_processor_id())
+		if (i == task_cpu(current))
 			continue;
 		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
 		if (ret < 0)
@@ -99,23 +191,24 @@ static int stop_machine(void)
 	}
 
 	/* Wait for them all to come to life. */
+	smt->state = SM_STOPPER_WAITING;
 	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
-		yield();
+		msleep(10);
+
+	smt->state = SM_STOPPER_READY;
 
 	/* If some failed, kill them all. */
 	if (ret < 0) {
 		stopmachine_set_state(STOPMACHINE_EXIT);
-		up(&stopmachine_mutex);
 		return ret;
 	}
 
-	/* Don't schedule us away at this point, please. */
-	local_irq_disable();
-
 	/* Now they are all started, make them hold the CPUs, ready. */
+	preempt_disable();
 	stopmachine_set_state(STOPMACHINE_PREPARE);
 
 	/* Make them disable irqs. */
+	local_irq_disable();
 	stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
 
 	return 0;
@@ -125,6 +218,7 @@ static void restart_machine(void)
 {
 	stopmachine_set_state(STOPMACHINE_EXIT);
 	local_irq_enable();
+	preempt_enable_no_resched();
 }
 
 struct stop_machine_data
@@ -138,12 +232,22 @@ static int do_stop(void *_smdata)
 {
 	struct stop_machine_data *smdata = _smdata;
 	int ret;
+	struct stop_machine_task smt;
+
+	smt.state = SM_STOPPER_INIT;
+	smt.task = current;
+	smt.data[0] = smt.data[1] = 0;
+	sm_add(&smt);
 
-	ret = stop_machine();
+	ret = stop_machine(&smt);
 	if (ret == 0) {
+		smt.state = SM_STOPPER_WORK;
 		ret = smdata->fn(smdata->data);
+		smt.state = SM_STOPPER_RESTART;
 		restart_machine();
-	}
+		smt.state = SM_STOPPER_OK;
+	} else
+		smt.state = SM_STOPPER_ERR;
 
 	/* We're done: you can kthread_stop us now */
 	complete(&smdata->done);
@@ -151,10 +255,13 @@ static int do_stop(void *_smdata)
 	/* Wait for kthread_stop */
 	set_current_state(TASK_INTERRUPTIBLE);
 	while (!kthread_should_stop()) {
+		smt.data[0]++;
 		schedule();
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
 	__set_current_state(TASK_RUNNING);
+
+	sm_del(&smt);
 	return ret;
 }
 
@@ -163,6 +270,12 @@ struct task_struct *__stop_machine_run(i
 {
 	struct stop_machine_data smdata;
 	struct task_struct *p;
+	struct stop_machine_task smt;
+
+	smt.state = SM_CALLER_INIT;
+	smt.task = current;
+	smt.data[0] = smt.data[1] = 0;
+	sm_add(&smt);
 
 	smdata.fn = fn;
 	smdata.data = data;
@@ -170,6 +283,8 @@ struct task_struct *__stop_machine_run(i
 
 	down(&stopmachine_mutex);
 
+	smt.state = SM_CALLER_LOCKED;
+
 	/* If they don't care which CPU fn runs on, bind to any online one. */
 	if (cpu == NR_CPUS)
 		cpu = smp_processor_id();
@@ -178,9 +293,13 @@ struct task_struct *__stop_machine_run(i
 	if (!IS_ERR(p)) {
 		kthread_bind(p, cpu);
 		wake_up_process(p);
+
+		smt.state = SM_CALLER_WAIT;
 		wait_for_completion(&smdata.done);
 	}
 	up(&stopmachine_mutex);
+
+	sm_del(&smt);
 	return p;
 }
 
diff -Nurap linux-2.6.9-100.orig/kernel/sys.c linux-2.6.9-ve023stab054/kernel/sys.c
--- linux-2.6.9-100.orig/kernel/sys.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/sys.c	2011-06-15 19:26:22.000000000 +0400
@@ -12,6 +12,7 @@
 #include <linux/mman.h>
 #include <linux/smp_lock.h>
 #include <linux/notifier.h>
+#include <linux/virtinfo.h>
 #include <linux/reboot.h>
 #include <linux/prctl.h>
 #include <linux/init.h>
@@ -223,6 +224,102 @@ int unregister_reboot_notifier(struct no
 
 EXPORT_SYMBOL(unregister_reboot_notifier);
 
+DECLARE_MUTEX(virtinfo_sem);
+EXPORT_SYMBOL(virtinfo_sem);
+static struct vnotifier_block *virtinfo_chain[VIRT_TYPES];
+
+void __virtinfo_notifier_register(int type, struct vnotifier_block *nb)
+{
+	struct vnotifier_block **p;
+
+	for (p = &virtinfo_chain[type];
+	     *p != NULL && nb->priority < (*p)->priority;
+	     p = &(*p)->next);
+	nb->next = *p;
+	smp_wmb();
+	*p = nb;
+}
+
+EXPORT_SYMBOL(__virtinfo_notifier_register);
+
+void virtinfo_notifier_register(int type, struct vnotifier_block *nb)
+{
+	down(&virtinfo_sem);
+	__virtinfo_notifier_register(type, nb);
+	up(&virtinfo_sem);
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_register);
+
+struct virtinfo_cnt_struct {
+	volatile unsigned long exit[NR_CPUS];
+	volatile unsigned long entry;
+};
+static DEFINE_PER_CPU(struct virtinfo_cnt_struct, virtcnt);
+
+void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb)
+{
+	struct vnotifier_block **p;
+	int entry_cpu, exit_cpu;
+	unsigned long cnt, ent;
+
+	down(&virtinfo_sem);
+	for (p = &virtinfo_chain[type]; *p != nb; p = &(*p)->next);
+	*p = nb->next;
+	smp_mb();
+
+	for_each_cpu_mask(entry_cpu, cpu_possible_map) {
+		while (1) {
+			cnt = 0;
+			for_each_cpu_mask(exit_cpu, cpu_possible_map)
+				cnt +=
+				    per_cpu(virtcnt, entry_cpu).exit[exit_cpu];
+			smp_rmb();
+			ent = per_cpu(virtcnt, entry_cpu).entry;
+			if (cnt == ent)
+				break;
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout(HZ / 100);
+		}
+	}
+	up(&virtinfo_sem);
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_unregister);
+
+int virtinfo_notifier_call(int type, unsigned long n, void *data)
+{
+	int ret;
+	int entry_cpu, exit_cpu;
+	struct vnotifier_block *nb;
+
+	entry_cpu = get_cpu();
+	per_cpu(virtcnt, entry_cpu).entry++;
+	smp_wmb();
+	put_cpu();
+
+	nb = virtinfo_chain[type];
+	ret = NOTIFY_DONE;
+	while (nb)
+	{
+		ret = nb->notifier_call(nb, n, data, ret);
+		if(ret & NOTIFY_STOP_MASK) {
+			ret &= ~NOTIFY_STOP_MASK;
+			break;
+		}
+		nb = nb->next;
+	}
+
+	exit_cpu = get_cpu();
+	smp_wmb();
+	per_cpu(virtcnt, entry_cpu).exit[exit_cpu]++;
+	put_cpu();
+
+	return ret;
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_call);
+
 asmlinkage long sys_ni_syscall(void)
 {
 	return -ENOSYS;
@@ -290,7 +387,9 @@ cond_syscall(compat_set_mempolicy)
 cond_syscall(sys_add_key);
 cond_syscall(sys_request_key);
 cond_syscall(sys_keyctl);
+#ifdef CONFIG_KEYS_COMPAT
 cond_syscall(compat_sys_keyctl);
+#endif
 
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read)
@@ -342,17 +441,19 @@ asmlinkage long sys_setpriority(int whic
 	switch (which) {
 		case PRIO_PROCESS:
 			if (!who)
-				who = current->pid;
-			p = find_task_by_pid(who);
+				who = virt_pid(current);
+			p = find_task_by_pid_ve(who);
 			if (p)
 				error = set_one_prio(p, niceval, error);
 			break;
 		case PRIO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			else
+				who = vpid_to_pid(who);
+			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
 				error = set_one_prio(p, niceval, error);
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			user = current->user;
@@ -362,10 +463,10 @@ asmlinkage long sys_setpriority(int whic
 				if ((who != current->uid) && !(user = find_user(who)))
 					goto out_unlock;	/* No processes for this user */
 
-			do_each_thread(g, p)
+			do_each_thread_ve(g, p) {
 				if (p->uid == who)
 					error = set_one_prio(p, niceval, error);
-			while_each_thread(g, p);
+			} while_each_thread_ve(g, p);
 			if (who != current->uid)
 				free_uid(user);		/* For find_user() */
 			break;
@@ -395,8 +496,8 @@ asmlinkage long sys_getpriority(int whic
 	switch (which) {
 		case PRIO_PROCESS:
 			if (!who)
-				who = current->pid;
-			p = find_task_by_pid(who);
+				who = virt_pid(current);
+			p = find_task_by_pid_ve(who);
 			if (p) {
 				niceval = 20 - task_nice(p);
 				if (niceval > retval)
@@ -406,11 +507,13 @@ asmlinkage long sys_getpriority(int whic
 		case PRIO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			else
+				who = vpid_to_pid(who);
+			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
 				niceval = 20 - task_nice(p);
 				if (niceval > retval)
 					retval = niceval;
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			user = current->user;
@@ -420,13 +523,13 @@ asmlinkage long sys_getpriority(int whic
 				if ((who != current->uid) && !(user = find_user(who)))
 					goto out_unlock;	/* No processes for this user */
 
-			do_each_thread(g, p)
+			do_each_thread_ve(g, p) {
 				if (p->uid == who) {
 					niceval = 20 - task_nice(p);
 					if (niceval > retval)
 						retval = niceval;
 				}
-			while_each_thread(g, p);
+			} while_each_thread_ve(g, p);
 			if (who != current->uid)
 				free_uid(user);		/* for find_user() */
 			break;
@@ -462,6 +565,26 @@ asmlinkage long sys_reboot(int magic1, i
 	                magic2 != LINUX_REBOOT_MAGIC2C))
 		return -EINVAL;
 
+#ifdef CONFIG_VE
+	if (!ve_is_super(get_exec_env()))
+		switch (cmd) {
+		case LINUX_REBOOT_CMD_RESTART:
+		case LINUX_REBOOT_CMD_RESTART2:
+			set_bit(VE_REBOOT, &get_exec_env()->flags);
+
+		case LINUX_REBOOT_CMD_HALT:
+		case LINUX_REBOOT_CMD_POWER_OFF:
+			force_sig(SIGKILL, get_exec_env()->init_entry);
+
+		case LINUX_REBOOT_CMD_CAD_ON:
+		case LINUX_REBOOT_CMD_CAD_OFF:
+			return 0;
+
+		default:
+			return -EINVAL;
+		}
+#endif
+
 	lock_kernel();
 	switch (cmd) {
 	case LINUX_REBOOT_CMD_RESTART:
@@ -654,7 +777,7 @@ asmlinkage long sys_setgid(gid_t gid)
 	return 0;
 }
   
-static int set_user(uid_t new_ruid, int dumpclear)
+int set_user(uid_t new_ruid, int dumpclear)
 {
 	struct user_struct *new_user;
 
@@ -664,7 +787,7 @@ static int set_user(uid_t new_ruid, int 
 
 	if (atomic_read(&new_user->processes) >=
 				current->rlim[RLIMIT_NPROC].rlim_cur &&
-			new_user != &root_user) {
+			new_ruid != 0) {
 		free_uid(new_user);
 		return -EAGAIN;
 	}
@@ -679,6 +802,7 @@ static int set_user(uid_t new_ruid, int 
 	current->uid = new_ruid;
 	return 0;
 }
+EXPORT_SYMBOL(set_user);
 
 /*
  * Unprivileged users may change the real uid to the effective uid
@@ -961,8 +1085,26 @@ asmlinkage long sys_setfsgid(gid_t gid)
 	return old_fsgid;
 }
 
+#ifdef CONFIG_VE
+unsigned long long ve_relative_clock(struct timespec * ts)
+{
+	unsigned long long offset = 0;
+
+	if (ts->tv_sec > get_exec_env()->start_timespec.tv_sec ||
+	    (ts->tv_sec == get_exec_env()->start_timespec.tv_sec &&
+	     ts->tv_nsec >= get_exec_env()->start_timespec.tv_nsec))
+		offset = (unsigned long long)(ts->tv_sec -
+			get_exec_env()->start_timespec.tv_sec) * NSEC_PER_SEC
+			+ ts->tv_nsec -	get_exec_env()->start_timespec.tv_nsec;
+	return nsec_to_clock_t(offset);
+}
+#endif
+
 asmlinkage long sys_times(struct tms __user * tbuf)
 {
+#ifdef CONFIG_VE
+	struct timespec now;
+#endif
 	/*
 	 *	In the SMP world we might just be unlucky and have one of
 	 *	the times increment as we use it. Since the value is an
@@ -1007,7 +1149,12 @@ asmlinkage long sys_times(struct tms __u
 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 			return -EFAULT;
 	}
+#ifndef CONFIG_VE
 	return (long) jiffies_64_to_clock_t(get_jiffies_64());
+#else
+	do_posix_clock_monotonic_gettime(&now);
+	return ve_relative_clock(&now);
+#endif
 }
 
 /*
@@ -1027,21 +1174,24 @@ asmlinkage long sys_setpgid(pid_t pid, p
 {
 	struct task_struct *p;
 	int err = -EINVAL;
+	pid_t _pgid;
 
 	if (!pid)
-		pid = current->pid;
+		pid = virt_pid(current);
 	if (!pgid)
 		pgid = pid;
 	if (pgid < 0)
 		return -EINVAL;
 
+	_pgid = vpid_to_pid(pgid);
+
 	/* From this point forward we keep holding onto the tasklist lock
 	 * so that our parent does not change from under us. -DaveM
 	 */
 	write_lock_irq(&tasklist_lock);
 
 	err = -ESRCH;
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p)
 		goto out;
 
@@ -1066,25 +1216,35 @@ asmlinkage long sys_setpgid(pid_t pid, p
 	if (p->signal->leader)
 		goto out;
 
-	if (pgid != pid) {
+	pgid = virt_pid(p);
+	if (_pgid != p->pid) {
 		struct task_struct *p;
 
-		do_each_task_pid(pgid, PIDTYPE_PGID, p) {
-			if (p->signal->session == current->signal->session)
+		do_each_task_pid_ve(_pgid, PIDTYPE_PGID, p) {
+			if (p->signal->session == current->signal->session) {
+				pgid = virt_pgid(p);
 				goto ok_pgid;
-		} while_each_task_pid(pgid, PIDTYPE_PGID, p);
+			}
+		} while_each_task_pid_ve(_pgid, PIDTYPE_PGID, p);
 		goto out;
 	}
 
 ok_pgid:
-	err = security_task_setpgid(p, pgid);
+	err = security_task_setpgid(p, _pgid);
 	if (err)
 		goto out;
 
-	if (process_group(p) != pgid) {
+	if (process_group(p) != _pgid) {
 		detach_pid(p, PIDTYPE_PGID);
-		p->signal->pgrp = pgid;
-		attach_pid(p, PIDTYPE_PGID, pgid);
+		p->signal->pgrp = _pgid;
+		set_virt_pgid(p, pgid);
+		attach_pid(p, PIDTYPE_PGID, _pgid);
+		if (atomic_read(&p->signal->count) != 1) {
+			task_t *t;
+			for (t = next_thread(p); t != p; t = next_thread(t)) {
+				set_virt_pgid(t, pgid);
+			}
+		}
 	}
 
 	err = 0;
@@ -1097,19 +1257,19 @@ out:
 asmlinkage long sys_getpgid(pid_t pid)
 {
 	if (!pid) {
-		return process_group(current);
+		return virt_pgid(current);
 	} else {
 		int retval;
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		retval = -ESRCH;
 		if (p) {
 			retval = security_task_getpgid(p);
 			if (!retval)
-				retval = process_group(p);
+				retval = get_task_pgid(p);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1121,7 +1281,7 @@ asmlinkage long sys_getpgid(pid_t pid)
 asmlinkage long sys_getpgrp(void)
 {
 	/* SMP - assuming writes are word atomic this is fine */
-	return process_group(current);
+	return virt_pgid(current);
 }
 
 #endif
@@ -1129,19 +1289,19 @@ asmlinkage long sys_getpgrp(void)
 asmlinkage long sys_getsid(pid_t pid)
 {
 	if (!pid) {
-		return current->signal->session;
+		return virt_sid(current);
 	} else {
 		int retval;
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		retval = -ESRCH;
 		if(p) {
 			retval = security_task_getsid(p);
 			if (!retval)
-				retval = p->signal->session;
+				retval = get_task_sid(p);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1165,9 +1325,19 @@ asmlinkage long sys_setsid(void)
 
 	current->signal->leader = 1;
 	__set_special_pids(current->pid, current->pid);
+	set_virt_pgid(current, virt_pid(current));
+	set_virt_sid(current, virt_pid(current));
 	current->signal->tty = NULL;
 	current->signal->tty_old_pgrp = 0;
-	err = process_group(current);
+	if (atomic_read(&current->signal->count) != 1) {
+		task_t *t;
+		for (t = next_thread(current); t != current; t = next_thread(t)) {
+			set_virt_pgid(t, virt_pid(current));
+			set_virt_sid(t, virt_pid(current));
+		}
+	}
+
+	err = virt_pgid(current);
 out:
 	write_unlock_irq(&tasklist_lock);
 	up(&tty_sem);
@@ -1446,7 +1616,7 @@ asmlinkage long sys_newuname(struct new_
 	int errno = 0;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name,&system_utsname,sizeof *name))
+	if (copy_to_user(name,&ve_utsname,sizeof *name))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
@@ -1457,15 +1627,15 @@ asmlinkage long sys_sethostname(char __u
 	int errno;
 	char tmp[__NEW_UTS_LEN];
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	if (len < 0 || len > __NEW_UTS_LEN)
 		return -EINVAL;
 	down_write(&uts_sem);
 	errno = -EFAULT;
 	if (!copy_from_user(tmp, name, len)) {
-		memcpy(system_utsname.nodename, tmp, len);
-		system_utsname.nodename[len] = 0;
+		memcpy(ve_utsname.nodename, tmp, len);
+		ve_utsname.nodename[len] = 0;
 		errno = 0;
 	}
 	up_write(&uts_sem);
@@ -1481,11 +1651,11 @@ asmlinkage long sys_gethostname(char __u
 	if (len < 0)
 		return -EINVAL;
 	down_read(&uts_sem);
-	i = 1 + strlen(system_utsname.nodename);
+	i = 1 + strlen(ve_utsname.nodename);
 	if (i > len)
 		i = len;
 	errno = 0;
-	if (copy_to_user(name, system_utsname.nodename, i))
+	if (copy_to_user(name, ve_utsname.nodename, i))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
@@ -1502,7 +1672,7 @@ asmlinkage long sys_setdomainname(char _
 	int errno;
 	char tmp[__NEW_UTS_LEN];
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	if (len < 0 || len > __NEW_UTS_LEN)
 		return -EINVAL;
@@ -1510,8 +1680,8 @@ asmlinkage long sys_setdomainname(char _
 	down_write(&uts_sem);
 	errno = -EFAULT;
 	if (!copy_from_user(tmp, name, len)) {
-		memcpy(system_utsname.domainname, tmp, len);
-		system_utsname.domainname[len] = 0;
+		memcpy(ve_utsname.domainname, tmp, len);
+		ve_utsname.domainname[len] = 0;
 		errno = 0;
 	}
 	up_write(&uts_sem);
diff -Nurap linux-2.6.9-100.orig/kernel/sysctl.c linux-2.6.9-ve023stab054/kernel/sysctl.c
--- linux-2.6.9-100.orig/kernel/sysctl.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/sysctl.c	2011-06-15 19:26:22.000000000 +0400
@@ -25,6 +25,8 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
+#include <linux/ve_owner.h>
+#include <linux/ve.h>
 #include <linux/ctype.h>
 #include <linux/utsname.h>
 #include <linux/capability.h>
@@ -61,6 +63,7 @@ extern int sysctl_panic_on_oom;
 extern int max_threads;
 extern int sysrq_enabled;
 extern int core_uses_pid;
+extern int sysctl_at_vsyscall;
 extern int suid_dumpable;
 extern char core_pattern[];
 extern int cad_pid;
@@ -76,6 +79,21 @@ extern int sysctl_drop_caches;
 extern int write_mapped;
 extern int max_writeback_pages;
 
+#ifdef CONFIG_VE
+int glob_virt_pids = 1;
+EXPORT_SYMBOL(glob_virt_pids);
+int glob_ve_meminfo = 0;
+EXPORT_SYMBOL(glob_ve_meminfo);
+int ve_allow_kthreads;
+EXPORT_SYMBOL(ve_allow_kthreads);
+int ve_allow_init_signals = 1;
+EXPORT_SYMBOL(ve_allow_init_signals);
+#endif
+#ifdef CONFIG_MAGIC_SYSRQ
+extern int sysrq_key_scancode;
+#endif
+
+
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 int unknown_nmi_panic;
 extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
@@ -136,6 +154,11 @@ extern int msg_ctlmnb;
 extern int msg_ctlmni;
 extern int sem_ctls[];
 #endif
+#ifdef CONFIG_SCHED_VCPU
+extern u32 vcpu_sched_timeslice;
+extern u32 vcpu_timeslice;
+extern u32 vcpu_hot_timeslice;
+#endif
 
 #ifdef __sparc__
 extern char reboot_command [];
@@ -156,6 +179,7 @@ extern int sysctl_userprocess_debug;
 #endif
 
 extern int sysctl_hz_timer;
+int decode_call_traces = 1;
 
 #ifdef CONFIG_BSD_PROCESS_ACCT
 extern int acct_parm[];
@@ -165,10 +189,15 @@ extern int acct_parm[];
 extern int no_unaligned_warning;
 #endif
 
+#ifdef CONFIG_FAIRSCHED
+extern int fairsched_max_latency;
+extern int scale_vcpu_frequency;
+int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
+		        void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
 		       ctl_table *, void **);
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
-		  void __user *buffer, size_t *lenp, loff_t *ppos);
 
 static ctl_table root_table[];
 static struct ctl_table_header root_table_header =
@@ -188,6 +217,8 @@ extern ctl_table random_table[];
 extern ctl_table pty_table[];
 #endif
 
+extern int ve_area_access_check; /* fs/namei.c */
+
 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 int sysctl_legacy_va_layout;
 #endif
@@ -210,6 +241,8 @@ extern struct proc_dir_entry *proc_sys_r
 
 static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
+
+extern struct new_utsname virt_utsname;
 #endif
 
 /* Constants for minimum and maximum testing in vm_table.
@@ -489,7 +522,7 @@ static ctl_table kern_table[] = {
 		.ctl_name	= KERN_HOTPLUG,
 		.procname	= "hotplug",
 		.data		= &hotplug_path,
-		.maxlen		= KMOD_PATH_LEN,
+		.maxlen		= HOTPLUG_PATH_LEN,
 		.mode		= 0644,
 		.proc_handler	= &proc_dostring,
 		.strategy	= &sysctl_string,
@@ -582,6 +615,14 @@ static ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= KERN_SYSRQ_KEY_SCANCODE,
+		.procname	= "sysrq-key",
+		.data		= &sysrq_key_scancode,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 	{
 		.ctl_name	= KERN_CADPID,
@@ -674,6 +715,22 @@ static ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= KERN_VCPU_HOT_TIMESLICE,
+		.procname	= "vcpu_hot_timeslice",
+		.data		= &vcpu_hot_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_VE_MEMINFO,
+		.procname	= "ve_meminfo",
+		.data		= &glob_ve_meminfo,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 	{
 		.ctl_name	= KERN_PIDMAX,
@@ -773,6 +830,79 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+	{
+		.ctl_name	= KERN_SILENCE_LEVEL,
+		.procname	= "silence-level",
+		.data		= &console_silence_loglevel,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= KERN_ALLOC_FAIL_WARN,
+		.procname	= "alloc_fail_warn",
+		.data		= &alloc_fail_warn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+#ifdef CONFIG_FAIRSCHED
+	{
+		.ctl_name	= KERN_FAIRSCHED_MAX_LATENCY,
+		.procname	= "fairsched-max-latency",
+		.data		=  &fairsched_max_latency,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &fsch_sysctl_latency
+	},
+#endif
+#ifdef CONFIG_SCHED_VCPU
+	{
+		.ctl_name	= KERN_VCPU_SCHED_TIMESLICE,
+		.procname	= "vcpu_sched_timeslice",
+		.data		= &vcpu_sched_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_VCPU_TIMESLICE,
+		.procname	= "vcpu_timeslice",
+		.data		= &vcpu_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_VE
+	{
+		.ctl_name	= KERN_VIRT_PIDS,
+		.procname	= "virt_pids",
+		.data		= &glob_virt_pids,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
+	{
+		.ctl_name	= KERN_VIRT_OSRELEASE,
+		.procname	= "virt_osrelease",
+		.data		= virt_utsname.release,
+		.maxlen		= sizeof(virt_utsname.release),
+		.mode		= 0644,
+		.proc_handler	= &proc_doutsstring,
+		.strategy	= &sysctl_string,
+	},
+#ifdef CONFIG_FAIRSCHED
+	{
+		.ctl_name	= KERN_SCALE_VCPU_FREQUENCY,
+		.procname	= "scale_vcpu_frequency",
+		.data		= &scale_vcpu_frequency,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{ .ctl_name = 0 }
 };
 
@@ -1178,10 +1308,26 @@ static ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= FS_AT_VSYSCALL,
+		.procname	= "vsyscall",
+		.data		= &sysctl_at_vsyscall,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 	{ .ctl_name = 0 }
 };
 
 static ctl_table debug_table[] = {
+	{
+		.ctl_name	= DBG_DECODE_CALLTRACES,
+		.procname	= "decode_call_traces",
+		.data		= &decode_call_traces,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 	{ .ctl_name = 0 }
 };
 
@@ -1246,6 +1392,7 @@ int do_sysctl(int __user *name, int nlen
 {
 	struct list_head *tmp;
 	int error = -ENOTDIR;
+	struct ve_struct *ve;
 
 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
 		return -ENOTDIR;
@@ -1254,13 +1401,18 @@ int do_sysctl(int __user *name, int nlen
 		if (!oldlenp || get_user(old_len, oldlenp))
 			return -EFAULT;
 	}
+	ve = get_exec_env();
 	spin_lock(&sysctl_lock);
-	tmp = &root_table_header.ctl_entry;
+	tmp = ve->sysctl_lh.next;
 	do {
-		struct ctl_table_header *head =
-			list_entry(tmp, struct ctl_table_header, ctl_entry);
+		struct ctl_table_header *head;
 		void *context = NULL;
 
+		if (tmp == &ve->sysctl_lh)
+			/* second pass over global variables */
+			tmp = &root_table_header.ctl_entry;
+
+		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
 		if (!use_table(head))
 			continue;
 
@@ -1315,10 +1467,14 @@ static int test_perm(int mode, int op)
 static inline int ctl_perm(ctl_table *table, int op)
 {
 	int error;
+	int mode = table->mode;
+
 	error = security_sysctl(table, op);
 	if (error)
 		return error;
-	return test_perm(table->mode, op);
+	if (!ve_accessible(table->owner_env, get_exec_env()))
+		mode &= ~0222; /* disable write access */
+	return test_perm(mode, op);
 }
 
 static int parse_table(int __user *name, int nlen,
@@ -1484,6 +1640,8 @@ struct ctl_table_header *register_sysctl
 					       int insert_at_head)
 {
 	struct ctl_table_header *tmp;
+	struct list_head *lh;
+
 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
 	if (!tmp)
 		return NULL;
@@ -1492,17 +1650,52 @@ struct ctl_table_header *register_sysctl
 	tmp->used = 0;
 	tmp->unregistering = NULL;
 	spin_lock(&sysctl_lock);
+#ifdef CONFIG_VE
+	lh = &get_exec_env()->sysctl_lh;
+#else
+	lh = &root_table_header.ctl_entry;
+#endif
 	if (insert_at_head)
-		list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
+		list_add(&tmp->ctl_entry, lh);
 	else
-		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
+		list_add_tail(&tmp->ctl_entry, lh);
 	spin_unlock(&sysctl_lock);
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_VE
+	register_proc_table(table, get_exec_env()->proc_sys_root, tmp);
+#else
 	register_proc_table(table, proc_sys_root, tmp);
 #endif
+#endif
 	return tmp;
 }
 
+void free_sysctl_clone(ctl_table *clone)
+{
+	kfree(clone);
+}
+
+ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr)
+{
+	int i;
+	ctl_table *clone;
+
+	clone = kmalloc(nr * sizeof(ctl_table), GFP_KERNEL);
+	if (clone == NULL)
+		return NULL;
+
+	memcpy(clone, tmpl, nr * sizeof(ctl_table));
+	for (i = 0; i < nr; i++) {
+		if (tmpl[i].ctl_name == 0)
+			continue;
+		clone[i].owner_env = get_exec_env();
+		if (tmpl[i].child == NULL)
+			continue;
+		clone[i].child = clone + (tmpl[i].child - tmpl);
+	}
+	return clone;
+}
+
 /**
  * unregister_sysctl_table - unregister a sysctl table hierarchy
  * @header: the header returned from register_sysctl_table
@@ -1516,8 +1709,12 @@ void unregister_sysctl_table(struct ctl_
 	spin_lock(&sysctl_lock);
 	start_unregistering(header);
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_VE
+	unregister_proc_table(header->ctl_table, get_exec_env()->proc_sys_root);
+#else
 	unregister_proc_table(header->ctl_table, proc_sys_root);
 #endif
+#endif
 	spin_unlock(&sysctl_lock);
 	kfree(header);
 }
@@ -1604,10 +1801,6 @@ static void unregister_proc_table(ctl_ta
 		 */
 		de->data = NULL;
 
-		/* Don't unregister proc entries that are still being used.. */
-		if (atomic_read(&de->count))
-			continue;
-
 		table->de = NULL;
 		remove_proc_entry(table->procname, root);
 	}
@@ -1748,7 +1941,7 @@ int proc_dostring(ctl_table *table, int 
  *	to observe. Should this be in kernel/sys.c ????
  */
  
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+int proc_doutsstring(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int r;
@@ -2272,7 +2465,7 @@ int proc_dostring(ctl_table *table, int 
 	return -ENOSYS;
 }
 
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+int proc_doutsstring(ctl_table *table, int write, struct file *filp,
 			    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
@@ -2325,7 +2518,6 @@ int proc_doulongvec_ms_jiffies_minmax(ct
 
 #endif /* CONFIG_PROC_FS */
 
-
 /*
  * General sysctl support routines 
  */
@@ -2527,6 +2719,14 @@ void unregister_sysctl_table(struct ctl_
 {
 }
 
+ctl_table * clone_sysctl_template(ctl_table *tmpl, int nr)
+{
+	return NULL;
+}
+
+void free_sysctl_clone(ctl_table *tmpl)
+{
+}
 #endif /* CONFIG_SYSCTL */
 
 /*
@@ -2538,9 +2738,12 @@ EXPORT_SYMBOL(proc_dointvec_jiffies);
 EXPORT_SYMBOL(proc_dointvec_minmax);
 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
 EXPORT_SYMBOL(proc_dostring);
+EXPORT_SYMBOL(proc_doutsstring);
 EXPORT_SYMBOL(proc_doulongvec_minmax);
 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
 EXPORT_SYMBOL(register_sysctl_table);
+EXPORT_SYMBOL(clone_sysctl_template);
+EXPORT_SYMBOL(free_sysctl_clone);
 EXPORT_SYMBOL(sysctl_intvec);
 EXPORT_SYMBOL(sysctl_jiffies);
 EXPORT_SYMBOL(sysctl_string);
diff -Nurap linux-2.6.9-100.orig/kernel/timer.c linux-2.6.9-ve023stab054/kernel/timer.c
--- linux-2.6.9-100.orig/kernel/timer.c	2011-06-09 19:23:04.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/timer.c	2011-06-15 19:26:22.000000000 +0400
@@ -33,6 +33,7 @@
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/diskdump.h>
+#include <linux/virtinfo.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -242,7 +243,7 @@ void add_timer_on(struct timer_list *tim
 	spin_unlock_irqrestore(&base->lock, flags);
 }
 
-EXPORT_SYMBOL_GPL(add_timer_on);
+EXPORT_SYMBOL(add_timer_on);
 
 /***
  * mod_timer - modify a timer's timeout
@@ -460,6 +461,7 @@ repeat:
 		if (!list_empty(head)) {
 			void (*fn)(unsigned long);
 			unsigned long data;
+			struct ve_struct *envid;
 
 			timer = list_entry(head->next,struct timer_list,entry);
  			fn = timer->function;
@@ -472,9 +474,11 @@ repeat:
 				    * would cause a race with list_add
 				    */
 			timer->base = NULL;
+			envid = set_exec_env(get_ve0());
 			spin_unlock_irqrestore(&base->lock, flags);
 			fn(data);
 			spin_lock_irq(&base->lock);
+			(void)set_exec_env(envid);
 			goto repeat;
 		}
 	}
@@ -893,6 +897,22 @@ static unsigned long count_active_tasks(
  */
 unsigned long avenrun[3];
 
+static void calc_load_ve(void)
+{
+	unsigned long flags, nr_unint;
+
+	nr_unint = nr_uninterruptible() * FIXED_1;
+	spin_lock_irqsave(&kstat_glb_lock, flags);
+	CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
+	CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
+	CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
+	spin_unlock_irqrestore(&kstat_glb_lock, flags);
+
+#ifdef CONFIG_VE
+	do_update_load_avg_ve();
+#endif
+}
+
 /*
  * calc_load - given tick count, update the avenrun load estimates.
  * This is called while holding a write_lock on xtime_lock.
@@ -909,6 +929,7 @@ static inline void calc_load(unsigned lo
 		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+		calc_load_ve();
 	}
 }
 
@@ -1020,7 +1041,7 @@ asmlinkage unsigned long sys_alarm(unsig
  */
 asmlinkage long sys_getpid(void)
 {
-	return current->tgid;
+	return virt_tgid(current);
 }
 
 /*
@@ -1042,28 +1063,15 @@ asmlinkage long sys_getpid(void)
 asmlinkage long sys_getppid(void)
 {
 	int pid;
-	struct task_struct *me = current;
-	struct task_struct *parent;
 
-	parent = me->group_leader->real_parent;
-	for (;;) {
-		pid = parent->tgid;
-#ifdef CONFIG_SMP
-{
-		struct task_struct *old = parent;
-
-		/*
-		 * Make sure we read the pid before re-reading the
-		 * parent pointer:
-		 */
-		rmb();
-		parent = me->group_leader->real_parent;
-		if (old != parent)
-			continue;
-}
-#endif
-		break;
-	}
+	/* Some smart code used to be here. It was wrong.
+	 * ->real_parent could be released before dereference and
+	 * we accessed freed kernel memory, which faults with debugging on.
+	 * Keep it simple and stupid.
+	 */
+	read_lock(&tasklist_lock);
+	pid = virt_tgid(current->group_leader->real_parent);
+	read_unlock(&tasklist_lock);
 	return pid;
 }
 
@@ -1205,10 +1213,10 @@ EXPORT_SYMBOL(schedule_timeout_uninterru
 /* Thread ID - the internal kernel "pid" */
 asmlinkage long sys_gettid(void)
 {
-	return current->pid;
+	return virt_pid(current);
 }
 
-static long __sched nanosleep_restart(struct restart_block *restart)
+long __sched nanosleep_restart(struct restart_block *restart)
 {
 	unsigned long expire = restart->arg0, now = jiffies;
 	struct timespec __user *rmtp = (struct timespec __user *) restart->arg1;
@@ -1233,6 +1241,7 @@ static long __sched nanosleep_restart(st
 	}
 	return ret;
 }
+EXPORT_SYMBOL(nanosleep_restart);
 
 asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
 {
@@ -1275,11 +1284,12 @@ asmlinkage long sys_sysinfo(struct sysin
 	unsigned long mem_total, sav_total;
 	unsigned int mem_unit, bitcount;
 	unsigned long seq;
+	unsigned long *__avenrun;
+	struct timespec tp;
 
 	memset((char *)&val, 0, sizeof(struct sysinfo));
 
 	do {
-		struct timespec tp;
 		seq = read_seqbegin(&xtime_lock);
 
 		/*
@@ -1296,18 +1306,34 @@ asmlinkage long sys_sysinfo(struct sysin
 			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
 			tp.tv_sec++;
 		}
-		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
-
-		val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+	} while (read_seqretry(&xtime_lock, seq));
 
+	if (ve_is_super(get_exec_env())) {
+		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+		__avenrun = &avenrun[0];
 		val.procs = nr_threads;
-	} while (read_seqretry(&xtime_lock, seq));
+	}
+#ifdef CONFIG_VE
+	else {
+		struct ve_struct *ve;
+		ve = get_exec_env();
+		__avenrun = &ve->avenrun[0];
+		val.procs = atomic_read(&ve->pcounter);
+		val.uptime = tp.tv_sec - ve->start_timespec.tv_sec;
+	}
+#endif
+	val.loads[0] = __avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+	val.loads[1] = __avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+	val.loads[2] = __avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
 
 	si_meminfo(&val);
 	si_swapinfo(&val);
 
+#ifdef CONFIG_USER_RESOURCE
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_SYSINFO, &val)
+			& NOTIFY_FAIL)
+		return -ENOMSG;
+#endif
 	/*
 	 * If the sum of all the available memory (i.e. ram + swap)
 	 * is less than can be stored in a 32 bit unsigned long then
diff -Nurap linux-2.6.9-100.orig/kernel/ub/Kconfig linux-2.6.9-ve023stab054/kernel/ub/Kconfig
--- linux-2.6.9-100.orig/kernel/ub/Kconfig	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/Kconfig	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,89 @@
+#
+# User resources part (UBC)
+#
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+#
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+menu "User resources"
+
+config USER_RESOURCE
+	bool "Enable user resource accounting"
+	default y
+	help 
+          This patch provides accounting and allows to configure
+          limits for user's consumption of exhaustible system resources.
+          The most important resource controlled by this patch is unswappable 
+          memory (either mlock'ed or used by internal kernel structures and 
+          buffers). The main goal of this patch is to protect processes
+          from running short of important resources because of an accidental
+          misbehavior of processes or malicious activity aiming to ``kill'' 
+          the system. It's worth to mention that resource limits configured 
+          by setrlimit(2) do not give an acceptable level of protection 
+          because they cover only small fraction of resources and work on a 
+          per-process basis.  Per-process accounting doesn't prevent malicious
+          users from spawning a lot of resource-consuming processes.
+
+config USER_RSS_ACCOUNTING
+	bool "Account physical memory usage"
+	default y
+	depends on USER_RESOURCE
+	help
+          This allows to estimate per beancounter physical memory usage.
+          Implemented alghorithm accounts shared pages of memory as well,
+          dividing them by number of beancounter which use the page.
+
+config USER_SWAP_ACCOUNTING
+	bool "Account swap usage"
+	default y
+	depends on USER_RESOURCE
+	help
+          This allows accounting of swap usage.
+
+config USER_RESOURCE_PROC
+	bool "Report resource usage in /proc"
+	default y
+	depends on USER_RESOURCE
+	help
+          Allows a system administrator to inspect resource accounts and limits.
+
+config UBC_DEBUG
+	bool "User resources debug features"
+	default n
+	depends on USER_RESOURCE
+	help
+	  Enables to setup debug features for user resource accounting
+
+config UBC_DEBUG_KMEM
+	bool "Debug kmemsize with cache counters"
+	default n
+	depends on UBC_DEBUG
+	help
+	  Adds /proc/user_beancounters_debug entry to get statistics
+	  about cache usage of each beancounter
+
+config UBC_KEEP_UNUSED
+	bool "Keep unused beancounter alive"
+	default y
+	depends on UBC_DEBUG
+	help
+	  If on, unused beancounters are kept on the hash and maxheld value
+	  can be looked through.
+
+config UBC_DEBUG_ITEMS
+	bool "Account resources in items rather than in bytes"
+	default y
+	depends on UBC_DEBUG
+	help
+	  When true some of the resources (e.g. kmemsize) are accounted
+	  in items instead of bytes.
+
+config UBC_UNLIMITED
+	bool "Use unlimited ubc settings"
+	default y
+	depends on UBC_DEBUG
+	help
+	  When ON all limits and barriers are set to max values.
+
+endmenu
diff -Nurap linux-2.6.9-100.orig/kernel/ub/Makefile linux-2.6.9-ve023stab054/kernel/ub/Makefile
--- linux-2.6.9-100.orig/kernel/ub/Makefile	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/Makefile	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,20 @@
+#
+# User resources part (UBC)
+#
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+#
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+obj-y := ub_sys.o
+obj-$(CONFIG_USER_RESOURCE) += beancounter.o
+obj-$(CONFIG_USER_RESOURCE) += ub_dcache.o
+obj-$(CONFIG_USER_RESOURCE) += ub_mem.o
+obj-$(CONFIG_USER_RESOURCE) += ub_misc.o
+obj-$(CONFIG_USER_RESOURCE) += ub_net.o
+obj-$(CONFIG_USER_RESOURCE) += ub_pages.o
+obj-$(CONFIG_USER_RESOURCE) += ub_stat.o
+obj-$(CONFIG_USER_RESOURCE) += ub_oom.o
+
+obj-$(CONFIG_USER_RSS_ACCOUNTING) += ub_page_bc.o
+obj-$(CONFIG_USER_RESOURCE_PROC)  += ub_proc.o
diff -Nurap linux-2.6.9-100.orig/kernel/ub/beancounter.c linux-2.6.9-ve023stab054/kernel/ub/beancounter.c
--- linux-2.6.9-100.orig/kernel/ub/beancounter.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/beancounter.c	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,684 @@
+/*
+ *  linux/kernel/ub/beancounter.c
+ *
+ *  Copyright (C) 1998  Alan Cox
+ *                1998-2000  Andrey V. Savochkin <saw@saw.sw.com.sg>
+ *  Copyright (C) 2000-2005 SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * TODO:
+ *   - more intelligent limit check in mremap(): currently the new size is
+ *     charged and _then_ old size is uncharged
+ *     (almost done: !move_vma case is completely done,
+ *      move_vma in its current implementation requires too many conditions to
+ *      do things right, because it may be not only expansion, but shrinking
+ *      also, plus do_munmap will require an additional parameter...)
+ *   - problem: bad pmd page handling
+ *   - consider /proc redesign
+ *   - TCP/UDP ports
+ *   + consider whether __charge_beancounter_locked should be inline
+ *
+ * Changes:
+ *   1999/08/17  Marcelo Tosatti <marcelo@conectiva.com.br>
+ *	- Set "barrier" and "limit" parts of limits atomically.
+ *   1999/10/06  Marcelo Tosatti <marcelo@conectiva.com.br>
+ *	- setublimit system call.
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_vmpages.h>
+
+static kmem_cache_t *ub_cachep;
+static struct user_beancounter default_beancounter;
+struct user_beancounter ub0;
+
+const char *ub_rnames[] = {
+	"kmemsize",	/* 0 */
+	"lockedpages",
+	"privvmpages",
+	"shmpages",
+	"dummy",
+	"numproc",	/* 5 */
+	"physpages",
+	"vmguarpages",
+	"oomguarpages",
+	"numtcpsock",
+	"numflock",	/* 10 */
+	"numpty",
+	"numsiginfo",
+	"tcpsndbuf",
+	"tcprcvbuf",
+	"othersockbuf",	/* 15 */
+	"dgramrcvbuf",
+	"numothersock",
+	"dcachesize",
+	"numfile",
+	"dummy",	/* 20 */
+	"dummy",
+	"dummy",
+	"numiptent",
+	"unused_privvmpages",	/* UB_RESOURCES */
+	"tmpfs_respages",
+	"swap_pages",
+	"held_pages",
+};
+
+static void init_beancounter_struct(struct user_beancounter *ub);
+static void init_beancounter_store(struct user_beancounter *ub);
+static void init_beancounter_nolimits(struct user_beancounter *ub);
+
+void print_ub_uid(struct user_beancounter *ub, char *buf, int size)
+{
+	if (ub->parent != NULL)
+		snprintf(buf, size, "%u.%u", ub->parent->ub_uid, ub->ub_uid);
+	else
+		snprintf(buf, size, "%u", ub->ub_uid);
+}
+EXPORT_SYMBOL(print_ub_uid);
+
+#define ub_hash_fun(x) ((((x) >> 8) ^ (x)) & (UB_HASH_SIZE - 1))
+#define ub_subhash_fun(p, id) ub_hash_fun((p)->ub_uid + (id) * 17)
+struct ub_hash_slot ub_hash[UB_HASH_SIZE];
+spinlock_t ub_hash_lock;
+EXPORT_SYMBOL(ub_hash);
+EXPORT_SYMBOL(ub_hash_lock);
+
+/*
+ *	Per user resource beancounting. Resources are tied to their luid.
+ *	The resource structure itself is tagged both to the process and
+ *	the charging resources (a socket doesn't want to have to search for
+ *	things at irq time for example). Reference counters keep things in
+ *	hand.
+ *
+ *	The case where a user creates resource, kills all his processes and
+ *	then starts new ones is correctly handled this way. The refcounters
+ *	will mean the old entry is still around with resource tied to it.
+ */
+struct user_beancounter *get_beancounter_byuid(uid_t uid, int create)
+{
+	struct user_beancounter *new_ub, *ub;
+	unsigned long flags;
+	struct ub_hash_slot *slot;
+
+	slot = &ub_hash[ub_hash_fun(uid)];
+	new_ub = NULL;
+
+retry:
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	ub = slot->ubh_beans;
+	while (ub != NULL && (ub->ub_uid != uid || ub->parent != NULL))
+		ub = ub->ub_next;
+
+	if (ub != NULL) {
+		/* found */
+		get_beancounter(ub);
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		if (new_ub != NULL)
+			kmem_cache_free(ub_cachep, new_ub);
+		return ub;
+	}
+
+	if (!create) {
+		/* no ub found */
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return NULL;
+	}
+
+	if (new_ub != NULL) {
+		/* install new ub */
+		new_ub->ub_next = slot->ubh_beans;
+		slot->ubh_beans = new_ub;
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return new_ub;
+	}
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	/* alloc new ub */
+	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, 
+			GFP_KERNEL);
+	if (new_ub == NULL)
+		return NULL;
+
+	ub_debug(UBD_ALLOC, "Creating ub %p in slot %p\n", new_ub, slot);
+	memcpy(new_ub, &default_beancounter, sizeof(*new_ub));
+	init_beancounter_struct(new_ub);
+	new_ub->ub_uid = uid;
+	goto retry;
+}
+EXPORT_SYMBOL(get_beancounter_byuid);
+
+struct user_beancounter *get_subbeancounter_byid(struct user_beancounter *p,
+		int id, int create)
+{
+	struct user_beancounter *new_ub, *ub;
+	unsigned long flags;
+	struct ub_hash_slot *slot;
+
+	slot = &ub_hash[ub_subhash_fun(p, id)];
+	new_ub = NULL;
+
+retry:
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	ub = slot->ubh_beans;
+	while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
+		ub = ub->ub_next;
+
+	if (ub != NULL) {
+		/* found */
+		get_beancounter(ub);
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		if (new_ub != NULL) {
+			put_beancounter(new_ub->parent);
+			kmem_cache_free(ub_cachep, new_ub);
+		}
+		return ub;
+	}
+
+	if (!create) {
+		/* no ub found */
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return NULL;
+	}
+
+	if (new_ub != NULL) {
+		/* install new ub */
+		get_beancounter(new_ub);
+		new_ub->ub_next = slot->ubh_beans;
+		slot->ubh_beans = new_ub;
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return new_ub;
+	}
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	/* alloc new ub */
+	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, 
+			GFP_KERNEL);
+	if (new_ub == NULL)
+		return NULL;
+
+	ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", new_ub, slot);
+	memset(new_ub, 0, sizeof(*new_ub));
+	init_beancounter_nolimits(new_ub);
+	init_beancounter_store(new_ub);
+	init_beancounter_struct(new_ub);
+	atomic_set(&new_ub->ub_refcount, 0);
+	new_ub->ub_uid = id;
+	new_ub->parent = get_beancounter(p);
+	goto retry;
+}
+EXPORT_SYMBOL(get_subbeancounter_byid);
+
+struct user_beancounter *subbeancounter_findcreate(struct user_beancounter *p,
+		int id)
+{
+	struct user_beancounter *ub;
+	unsigned long flags;
+	struct ub_hash_slot *slot;
+
+	slot = &ub_hash[ub_subhash_fun(p, id)];
+
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	ub = slot->ubh_beans;
+	while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
+		ub = ub->ub_next;
+
+	if (ub != NULL) {
+		/* found */
+		get_beancounter(ub);
+		goto done;
+	}
+
+	/* alloc new ub */
+	/* Can be called from non-atomic contexts. Den */
+	ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, GFP_ATOMIC);
+	if (ub == NULL)
+		goto done;
+
+	ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", ub, slot);
+	memset(ub, 0, sizeof(*ub));
+	init_beancounter_nolimits(ub);
+	init_beancounter_store(ub);
+	init_beancounter_struct(ub);
+	atomic_set(&ub->ub_refcount, 0);
+	ub->ub_uid = id;
+	ub->parent = get_beancounter(p);
+
+	/* install new ub */
+	get_beancounter(ub);
+	ub->ub_next = slot->ubh_beans;
+	slot->ubh_beans = ub;
+
+done:
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+	return ub;
+}
+EXPORT_SYMBOL(subbeancounter_findcreate);
+#ifndef CONFIG_UBC_KEEP_UNUSED
+
+static int verify_res(struct user_beancounter *ub, int resource,
+		unsigned long held)
+{
+	char id[64];
+
+	if (likely(held == 0))
+		return 1;
+
+	print_ub_uid(ub, id, sizeof(id));
+	printk(KERN_WARNING "Ub %s helds %lu in %s on put\n",
+			id, held, ub_rnames[resource]);
+	return 0;
+}
+
+static inline void verify_held(struct user_beancounter *ub)
+{
+	int i, clean;
+
+	clean = 1;
+	for (i = 0; i < UB_RESOURCES; i++)
+		clean &= verify_res(ub, i, ub->ub_parms[i].held);
+
+	clean &= verify_res(ub, UB_UNUSEDPRIVVM, ub->ub_unused_privvmpages);
+	clean &= verify_res(ub, UB_TMPFSPAGES, ub->ub_tmpfs_respages);
+	clean &= verify_res(ub, UB_SWAPPAGES, ub->ub_swap_pages);
+	clean &= verify_res(ub, UB_HELDPAGES, (unsigned long)ub->ub_held_pages);
+
+	ub_debug_trace(!clean, 5, 60*HZ);
+}
+
+static void __unhash_beancounter(struct user_beancounter *ub)
+{
+	struct user_beancounter **ubptr;
+	struct ub_hash_slot *slot;
+
+	if (ub->parent != NULL)
+		slot = &ub_hash[ub_subhash_fun(ub->parent, ub->ub_uid)];
+	else
+	       	slot = &ub_hash[ub_hash_fun(ub->ub_uid)];
+	ubptr = &slot->ubh_beans;
+
+	while (*ubptr != NULL) {
+		if (*ubptr == ub) {
+			verify_held(ub);
+			*ubptr = ub->ub_next;
+			return;
+		}
+		ubptr = &((*ubptr)->ub_next);
+	}
+	printk(KERN_ERR "Invalid beancounter %p, luid=%d on free, slot %p\n",
+			ub, ub->ub_uid, slot);
+}
+#endif
+
+static void put_warn(struct user_beancounter *ub)
+{
+	char id[64];
+
+	print_ub_uid(ub, id, sizeof(id));
+	printk(KERN_ERR "UB: Bad refcount (%d) on put of %s (%p)\n",
+			atomic_read(&ub->ub_refcount), id, ub);
+}
+
+void __put_beancounter(struct user_beancounter *ub)
+{
+	unsigned long flags;
+	struct user_beancounter *parent;
+
+again:
+	parent = ub->parent;
+	ub_debug(UBD_ALLOC, "__put bc %p (cnt %d) for %.20s pid %d "
+			"cur %08lx cpu %d.\n",
+			ub, atomic_read(&ub->ub_refcount), 
+			current->comm, current->pid, 
+			(unsigned long)current, smp_processor_id());
+
+	/* equevalent to atomic_dec_and_lock_irqsave() */
+	local_irq_save(flags);
+	if (likely(!atomic_dec_and_lock(&ub->ub_refcount, &ub_hash_lock))) {
+		if (unlikely(atomic_read(&ub->ub_refcount) < 0))
+			put_warn(ub);
+		local_irq_restore(flags);
+		return;
+	}
+
+	if (unlikely(ub == get_ub0())) {
+		printk(KERN_ERR "Trying to put ub0\n");
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return;
+	}
+
+#ifndef CONFIG_UBC_KEEP_UNUSED
+	__unhash_beancounter(ub);
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+	ub_free_counters(ub);
+	kmem_cache_free(ub_cachep, ub);
+#else
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+#endif
+	ub = parent;
+	if (ub != NULL)
+		goto again;
+}
+EXPORT_SYMBOL(__put_beancounter);
+
+/*
+ *	Generic resource charging stuff
+ */
+
+int __charge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val, enum ub_severity strict)
+{
+	ub_debug_resource(resource, "Charging %lu for %d of %p with %lu\n",
+			val, resource, ub, ub->ub_parms[resource].held);
+	/*
+	 * ub_value <= UB_MAXVALUE, value <= UB_MAXVALUE, and only one addition
+	 * at the moment is possible so an overflow is impossible.  
+	 */
+	ub->ub_parms[resource].held += val;
+
+	switch (strict) {
+		case UB_HARD:
+			if (ub->ub_parms[resource].held >
+					ub->ub_parms[resource].barrier)
+				break;
+		case UB_SOFT:
+			if (ub->ub_parms[resource].held >
+					ub->ub_parms[resource].limit)
+				break;
+		case UB_FORCE:
+			ub_adjust_maxheld(ub, resource);
+			return 0;
+		default:
+			BUG();
+	}
+
+	if (strict == UB_SOFT && ub_ratelimit(&ub->ub_limit_rl))
+		printk(KERN_INFO "Fatal resource shortage: %s, UB %d.\n",
+		       ub_rnames[resource], ub->ub_uid);
+	ub->ub_parms[resource].failcnt++;
+	ub->ub_parms[resource].held -= val;
+	return -ENOMEM;
+}
+
+int charge_beancounter(struct user_beancounter *ub,
+		int resource, unsigned long val, enum ub_severity strict)
+{
+	int retval;
+	struct user_beancounter *p, *q;
+	unsigned long flags;
+
+	retval = -EINVAL;
+	if (val > UB_MAXVALUE)
+		goto out;
+
+	local_irq_save(flags);
+	for (p = ub; p != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		retval = __charge_beancounter_locked(p, resource, val, strict);
+		spin_unlock(&p->ub_lock);
+		if (retval)
+			goto unroll;
+	}
+out_restore:
+	local_irq_restore(flags);
+out:
+	return retval;
+
+unroll:
+	for (q = ub; q != p; q = q->parent) {
+		spin_lock(&q->ub_lock);
+		__uncharge_beancounter_locked(q, resource, val);
+		spin_unlock(&q->ub_lock);
+	}
+	goto out_restore;
+}
+
+EXPORT_SYMBOL(charge_beancounter);
+
+void __charge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	struct user_beancounter *p;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	for (p = ub; p->parent != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		__charge_beancounter_locked(p, resource, val, UB_FORCE);
+		spin_unlock(&p->ub_lock);
+	}
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(__charge_beancounter_notop);
+
+void uncharge_warn(struct user_beancounter *ub, int resource,
+		unsigned long val, unsigned long held)
+{
+	char id[64];
+
+	print_ub_uid(ub, id, sizeof(id));
+	printk(KERN_ERR "Uncharging too much %lu h %lu, res %s ub %s\n",
+			val, held, ub_rnames[resource], id);
+	ub_debug_trace(1, 10, 10*HZ);
+}
+
+void __uncharge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	ub_debug_resource(resource, "Uncharging %lu for %d of %p with %lu\n",
+			val, resource, ub, ub->ub_parms[resource].held);
+	if (ub->ub_parms[resource].held < val) {
+		uncharge_warn(ub, resource,
+				val, ub->ub_parms[resource].held);
+		val = ub->ub_parms[resource].held;
+	}
+	ub->ub_parms[resource].held -= val;
+}
+
+void uncharge_beancounter(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	unsigned long flags;
+	struct user_beancounter *p;
+
+	for (p = ub; p != NULL; p = p->parent) {
+		spin_lock_irqsave(&p->ub_lock, flags);
+		__uncharge_beancounter_locked(p, resource, val);
+		spin_unlock_irqrestore(&p->ub_lock, flags);
+	}
+}
+
+EXPORT_SYMBOL(uncharge_beancounter);
+
+void __uncharge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	struct user_beancounter *p;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	for (p = ub; p->parent != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		__uncharge_beancounter_locked(p, resource, val);
+		spin_unlock(&p->ub_lock);
+	}
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(__uncharge_beancounter_notop);
+
+
+/*
+ *	Rate limiting stuff.
+ */
+int ub_ratelimit(struct ub_rate_info *p)
+{
+	unsigned long cjif, djif;
+	unsigned long flags;
+	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
+	long new_bucket;
+
+	spin_lock_irqsave(&ratelimit_lock, flags);
+	cjif = jiffies;
+	djif = cjif - p->last;
+	if (djif < p->interval) {
+		if (p->bucket >= p->burst) {
+			spin_unlock_irqrestore(&ratelimit_lock, flags);
+			return 0;
+		}
+		p->bucket++;
+	} else {
+		new_bucket = p->bucket - (djif / (unsigned)p->interval);
+		if (new_bucket < 0)
+			new_bucket = 0;
+		p->bucket = new_bucket + 1;
+	}
+	p->last = cjif;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
+}
+EXPORT_SYMBOL(ub_ratelimit);
+
+
+/*
+ *	Initialization
+ *
+ *	struct user_beancounter contains
+ *	 - limits and other configuration settings,
+ *	   with a copy stored for accounting purposes,
+ *	 - structural fields: lists, spinlocks and so on.
+ *
+ *	Before these parts are initialized, the structure should be memset
+ *	to 0 or copied from a known clean structure.  That takes care of a lot
+ *	of fields not initialized explicitly.
+ */
+
+static void init_beancounter_struct(struct user_beancounter *ub)
+{
+	ub->ub_magic = UB_MAGIC;
+	atomic_set(&ub->ub_refcount, 1);
+	spin_lock_init(&ub->ub_lock);
+	INIT_LIST_HEAD(&ub->ub_tcp_sk_list);
+	INIT_LIST_HEAD(&ub->ub_other_sk_list);
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	INIT_LIST_HEAD(&ub->ub_cclist);
+#endif
+}
+
+static void init_beancounter_store(struct user_beancounter *ub)
+{
+	int k;
+
+	for (k = 0; k < UB_RESOURCES; k++) {
+		memcpy(&ub->ub_store[k], &ub->ub_parms[k],
+				sizeof(struct ubparm));
+	}
+}
+
+static void init_beancounter_nolimits(struct user_beancounter *ub)
+{
+	int k;
+
+	for (k = 0; k < UB_RESOURCES; k++) {
+		ub->ub_parms[k].limit = UB_MAXVALUE;
+		/* FIXME: whether this is right for physpages and guarantees? */
+		ub->ub_parms[k].barrier = UB_MAXVALUE;
+	}
+
+	/* FIXME: set unlimited rate? */
+	ub->ub_limit_rl.burst = 4;
+	ub->ub_limit_rl.interval = 300*HZ;
+}
+
+static void init_beancounter_syslimits(struct user_beancounter *ub,
+		unsigned long mp)
+{
+	extern int max_threads;
+	int k;
+
+	ub->ub_parms[UB_KMEMSIZE].limit = 
+		mp > (192*1024*1024 >> PAGE_SHIFT) ?
+				32*1024*1024 : (mp << PAGE_SHIFT) / 6;
+	ub->ub_parms[UB_LOCKEDPAGES].limit = 8;
+	ub->ub_parms[UB_PRIVVMPAGES].limit = UB_MAXVALUE;
+	ub->ub_parms[UB_SHMPAGES].limit = 64;
+	ub->ub_parms[UB_NUMPROC].limit = max_threads / 2;
+	ub->ub_parms[UB_NUMTCPSOCK].limit = 1024;
+	ub->ub_parms[UB_TCPSNDBUF].limit = 1024*4*1024; /* 4k per socket */
+	ub->ub_parms[UB_TCPRCVBUF].limit = 1024*6*1024; /* 6k per socket */
+	ub->ub_parms[UB_NUMOTHERSOCK].limit = 256;
+	ub->ub_parms[UB_DGRAMRCVBUF].limit = 256*4*1024; /* 4k per socket */
+	ub->ub_parms[UB_OTHERSOCKBUF].limit = 256*8*1024; /* 8k per socket */
+	ub->ub_parms[UB_NUMFLOCK].limit = 1024;
+	ub->ub_parms[UB_NUMPTY].limit = 16;
+	ub->ub_parms[UB_NUMSIGINFO].limit = 1024;
+	ub->ub_parms[UB_DCACHESIZE].limit = 1024*1024;
+	ub->ub_parms[UB_NUMFILE].limit = 1024;
+
+	for (k = 0; k < UB_RESOURCES; k++)
+		ub->ub_parms[k].barrier = ub->ub_parms[k].limit;
+
+	ub->ub_limit_rl.burst = 4;
+	ub->ub_limit_rl.interval = 300*HZ;
+}
+
+void __init ub0_init(void)
+{
+	struct user_beancounter *ub;
+
+	init_cache_counters();
+	ub = get_ub0();
+	memset(ub, 0, sizeof(*ub));
+	ub->ub_uid = 0;
+	init_beancounter_nolimits(ub);
+	init_beancounter_store(ub);
+	init_beancounter_struct(ub);
+
+	memset(task_bc(current), 0, sizeof(struct task_beancounter));
+	(void)set_exec_ub(ub);
+	task_bc(current)->task_ub = get_beancounter(ub);
+	__charge_beancounter_locked(ub, UB_NUMPROC, 1, UB_FORCE);
+	task_bc(current)->fork_sub = get_beancounter(ub);
+	ub_init_task_bc(&current->task_bc);
+	mm_ub(&init_mm) = get_beancounter(ub);
+}
+
+void __init ub_hash_init(void)
+{
+	struct ub_hash_slot *slot;
+
+	spin_lock_init(&ub_hash_lock);
+	/* insert ub0 into the hash */
+	slot = &ub_hash[ub_hash_fun(get_ub0()->ub_uid)];
+	slot->ubh_beans = get_ub0();
+}
+
+void __init beancounter_init(unsigned long mempages)
+{
+	extern int skbc_cache_init(void);
+	int res;
+
+	res = skbc_cache_init();
+	ub_cachep = kmem_cache_create("user_beancounters",
+			sizeof(struct user_beancounter),
+			0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (res < 0 || ub_cachep == NULL)
+		panic("Can't create ubc caches\n");
+
+	memset(&default_beancounter, 0, sizeof(default_beancounter));
+#ifdef CONFIG_UBC_UNLIMITED
+	init_beancounter_nolimits(&default_beancounter);
+#else
+	init_beancounter_syslimits(&default_beancounter, mempages);
+#endif
+	init_beancounter_store(&default_beancounter);
+	init_beancounter_struct(&default_beancounter);
+
+	ub_hash_init();
+}
diff -Nurap linux-2.6.9-100.orig/kernel/ub/ub_dcache.c linux-2.6.9-ve023stab054/kernel/ub/ub_dcache.c
--- linux-2.6.9-100.orig/kernel/ub/ub_dcache.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/ub_dcache.c	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,645 @@
+/*
+ *  kernel/ub/ub_dcache.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/dcache.h>
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/kmem_slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <linux/swap.h>
+#include <linux/stop_machine.h>
+#include <linux/cpumask.h>
+#include <linux/nmi.h>
+#include <linux/rwsem.h>
+#include <linux/rcupdate.h>
+#include <asm/bitops.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/ub_dcache.h>
+#include <ub/ub_dcache_op.h>
+
+/*
+ * Locking
+ *                          traverse  dcache_lock  d_lock
+ *        ub_dentry_charge   +         -            +
+ *      ub_dentry_uncharge   +         +            -
+ * ub_dentry_charge_nofail   +         +            -
+ *
+ * d_inuse changes are atomic, with special handling of "not in use" <->
+ * "in use" (-1 <-> 0) transitions.  We have two sources of non-atomicity
+ * here: (1) in many operations we need to change d_inuse of both dentry and
+ * its parent, and (2) on state transitions we need to adjust the account.
+ *
+ * Regarding (1): we do not have (and do not want) a single lock covering all
+ * operations, so in general it's impossible to get a consistent view of
+ * a tree with respect to d_inuse counters (except by swsuspend).  It also
+ * means if a dentry with d_inuse of 0 gets one new in-use child and loses
+ * one, it's d_inuse counter will go either 0 -> 1 -> 0 path or 0 -> -1 -> 0,
+ * and we can't say which way.
+ * Note that path -1 -> 0 -> -1 can't turn into -1 -> -2 -> -1, since
+ * uncharge can be done only after return from charge (with d_genocide being
+ * the only apparent exception).
+ * Regarding (2): there is a similar uncertainty with the dcache account.
+ * If the account is equal to the limit, one more dentry is started to be
+ * used and one is put, the account will either hit the limit (and an error
+ * will be returned), or decrement will happen before increment.
+ *
+ * These races do not really matter.
+ * The only things we want are:
+ *  - if a system is suspenede with no in-use dentries, all d_inuse counters
+ *    should be correct (-1);
+ *  - d_inuse counters should always be >= -1.
+ * This holds if ->parent references are accessed and maintained properly.
+ * In subtle moments (like d_move) dentries exchanging their parents should
+ * both be in-use.  At d_genocide time, lookups and charges are assumed to be
+ * impossible.
+ */
+
+/*
+ * Hierarchical accounting
+ * UB argument must NOT be NULL
+ */
+
+static int do_charge_dcache(struct user_beancounter *ub, unsigned long size, 
+		enum ub_severity sv)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size), sv))
+		goto out_mem;
+	if (__charge_beancounter_locked(ub, UB_DCACHESIZE, size, sv))
+		goto out_dcache;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return 0;
+
+out_dcache:
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
+out_mem:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return -ENOMEM;
+}
+
+static void do_uncharge_dcache(struct user_beancounter *ub, 
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
+	__uncharge_beancounter_locked(ub, UB_DCACHESIZE, size);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static int charge_dcache(struct user_beancounter *ub, unsigned long size, 
+		enum ub_severity sv)
+{
+	struct user_beancounter *p, *q;
+
+	for (p = ub; p != NULL; p = p->parent) {
+		if (do_charge_dcache(p, size, sv))
+			goto unroll;
+	}
+	return 0;
+
+unroll:
+	for (q = ub; q != p; q = q->parent)
+		do_uncharge_dcache(q, size);
+	return -ENOMEM;
+}
+
+void uncharge_dcache(struct user_beancounter *ub, unsigned long size)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_uncharge_dcache(ub, size);
+}
+
+/*
+ * Simple helpers to do maintain account and d_ub field.
+ */
+
+static inline int d_charge(struct dentry_beancounter *d_bc)
+{
+	struct user_beancounter *ub;
+
+	ub = get_beancounter(get_exec_ub());
+	if (charge_dcache(ub, d_bc->d_ubsize, UB_SOFT)) {
+		put_beancounter(ub);
+		return -1;
+	}
+	d_bc->d_ub = ub;
+	return 0;
+}
+
+static inline void d_forced_charge(struct dentry_beancounter *d_bc)
+{
+	struct user_beancounter *ub;
+
+	ub = get_beancounter(get_exec_ub());
+	charge_dcache(ub, d_bc->d_ubsize, UB_FORCE);
+	d_bc->d_ub = ub;
+}
+
+/*
+ * Minor helpers
+ */
+
+extern kmem_cache_t *dentry_cache; 
+extern kmem_cache_t *inode_cachep;
+static struct rw_semaphore ub_dentry_alloc_sem;
+
+static inline unsigned int dentry_memusage(void)
+{
+	return kmem_cache_memusage(dentry_cache);
+}
+
+static inline unsigned int inode_memusage(void)
+{
+	return kmem_cache_memusage(inode_cachep);
+}
+
+static inline unsigned long d_charge_size(struct dentry *dentry)
+{
+	/* dentry's d_name is already set to appropriate value (see d_alloc) */
+	return inode_memusage() + dentry_memusage() +
+		(dname_external(dentry) ?
+		 kmem_obj_memusage((void *)dentry->d_name.name) : 0);
+}
+
+/*
+ * Entry points from dcache.c
+ */
+
+/* 
+ * Set initial d_inuse on d_alloc.
+ * Called with no locks, preemption disabled.
+ */
+int __ub_dentry_alloc(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+
+	d_bc = dentry_bc(dentry);
+	d_bc->d_ub = get_beancounter(get_exec_ub());
+	atomic_set(&d_bc->d_inuse, 0); /* see comment in ub_dcache.h */
+	d_bc->d_ubsize = d_charge_size(dentry);
+
+	if (charge_dcache(d_bc->d_ub, d_bc->d_ubsize, UB_HARD))
+		goto failure;
+	return 0;
+
+failure:
+	put_beancounter(d_bc->d_ub);
+	d_bc->d_ub = NULL;
+	return -ENOMEM;
+}
+
+void __ub_dentry_alloc_start(void)
+{
+	down_read(&ub_dentry_alloc_sem);
+	task_bc(current)->dentry_alloc = 1;
+}
+
+void __ub_dentry_alloc_end(void)
+{
+	task_bc(current)->dentry_alloc = 0;
+	up_read(&ub_dentry_alloc_sem);
+}
+
+/*
+ * It is assumed that parent is already in use, so traverse upwards is
+ * limited to one ancestor only.
+ * Called under d_lock and rcu_read_lock.
+ */
+int __ub_dentry_charge(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+	struct dentry *parent;
+	int ret;
+
+	d_bc = dentry_bc(dentry);
+	if (atomic_inc_and_test(&d_bc->d_inuse)) {
+		/* state transition -1 => 0 */
+		if (d_charge(d_bc))
+			goto failure;
+
+		if (dentry != dentry->d_parent) {
+			parent = dentry->d_parent;
+			if (atomic_inc_and_test(&dentry_bc(parent)->d_inuse))
+				BUG();
+		}
+	}
+	return 0;
+
+failure:
+	/*
+	 * Here we would like to fail the lookup.
+	 * It is not easy: if d_lookup fails, callers expect that a dentry
+	 * with the given name doesn't exist, and create a new one.
+	 * So, first we forcedly charge for this dentry.
+	 * Then try to remove it from cache safely.  If it turns out to be
+	 * possible, we can return error.
+	 */
+	d_forced_charge(d_bc);
+
+	if (dentry != dentry->d_parent) {
+		parent = dentry->d_parent;
+		if (atomic_inc_and_test(&dentry_bc(parent)->d_inuse))
+			BUG();
+	}
+
+	ret = 0;
+	if (spin_trylock(&dcache_lock)) {
+		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&dentry->d_lock);
+			spin_unlock(&dcache_lock);
+			rcu_read_unlock();
+			shrink_dcache_parent(dentry);
+			rcu_read_lock();
+			spin_lock(&dcache_lock);
+			spin_lock(&dentry->d_lock);
+		}
+		if (atomic_read(&dentry->d_count) == 1) {
+			__d_drop(dentry);
+			ret = -1;
+		}
+		spin_unlock(&dcache_lock);
+	}
+
+	return ret;
+}
+
+/*
+ * Go up in the tree decreasing d_inuse.
+ * Called under dcache_lock.
+ */
+void __ub_dentry_uncharge(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+	struct dentry *parent;
+	struct user_beancounter *ub;
+	unsigned long size;
+
+	/* go up until state doesn't change or and root is reached */
+	d_bc = dentry_bc(dentry);
+	size = d_bc->d_ubsize;
+	ub = d_bc->d_ub;
+	while (atomic_add_negative(-1, &d_bc->d_inuse)) {
+		/* state transition 0 => -1 */
+		uncharge_dcache(ub, size);
+		put_beancounter(ub);
+
+		parent = dentry->d_parent;
+		if (dentry == parent)
+			break;
+		dentry = parent;
+		d_bc = dentry_bc(dentry);
+		size = d_bc->d_ubsize;
+		ub = d_bc->d_ub;
+	}
+}
+
+/* 
+ * Forced charge for __dget_locked, where API doesn't allow to return error.
+ * Called under dcache_lock.
+ */
+void __ub_dentry_charge_nofail(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+	struct dentry *parent;
+
+	d_bc = dentry_bc(dentry);
+	while (atomic_inc_and_test(&d_bc->d_inuse)) {
+		/* state transition -1 => 0 */
+		d_forced_charge(d_bc);
+
+		parent = dentry->d_parent;
+		if (dentry == parent)
+			break;
+		dentry = parent;
+		d_bc = dentry_bc(dentry);
+	}
+}
+
+/*
+ * Adaptive accounting
+ */
+
+int ub_dentry_on;
+int ub_dentry_alloc_barrier;
+EXPORT_SYMBOL(ub_dentry_on);
+
+static DEFINE_PER_CPU(int, checkcnt);
+static unsigned long checklowat = 0;
+static unsigned long checkhiwat = ULONG_MAX;
+
+static int sysctl_ub_dentry_chk = 10;
+#define sysctl_ub_lowat	sysctl_ub_watermark[0]
+#define sysctl_ub_hiwat sysctl_ub_watermark[1]
+static DECLARE_RWSEM(ub_dentry_alloc_sem);
+/* 1024th of lowmem size */
+static unsigned int sysctl_ub_watermark[2] = {0, 100};
+
+
+static int ub_dentry_acctinit(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+
+	d_bc = dentry_bc(dentry);
+	d_bc->d_ub = NULL;
+	atomic_set(&d_bc->d_inuse, -1);
+	if (dname_external(dentry)) {
+		struct page *page;
+		page = virt_to_page(dentry->d_name.name);
+		if (!PageSlab(page) || GET_PAGE_CACHE(page) == NULL) {
+			printk("Problem with name, dentry %p, parent %p, "
+					"name %p len %d\n",
+					dentry, dentry->d_parent,
+					dentry->d_name.name,
+					dentry->d_name.len);
+			printk("   de %p name %.10s\n",
+					dentry, dentry->d_name.name);
+			d_bc->d_ubsize = 0;
+			return 0;
+		}
+	}
+	d_bc->d_ubsize = d_charge_size(dentry);
+	return 0;
+}
+
+static int ub_dentry_acctcount(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+	struct dentry *child;
+	int count;
+
+	count = 0;
+	list_for_each_entry(child, &dentry->d_subdirs, d_child)
+		count++;
+
+	d_bc = dentry_bc(dentry);
+	count = atomic_read(&dentry->d_count) - count;
+	if (count) {
+		__ub_dentry_charge_nofail(dentry);
+		if (count > 1)
+			atomic_add(count - 1, &d_bc->d_inuse);
+	}
+
+	return 0;
+}
+
+static int ub_dentry_acctdrop(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+
+	d_bc = dentry_bc(dentry);
+	if (atomic_read(&d_bc->d_inuse) < 0)
+		return 0;
+	atomic_set(&d_bc->d_inuse, -1);
+	uncharge_dcache(d_bc->d_ub, d_bc->d_ubsize);
+	put_beancounter(d_bc->d_ub);
+	return 0;
+}
+
+extern void kmem_cache_free_block(kmem_cache_t *cachep, void **objpp,
+		int nr_objects);
+static int ub_dentry_walk(int (*fun)(struct dentry *))
+{
+	kmem_cache_t *cachep;
+	struct array_cache *ac;
+	struct slab *slabp;
+	char *objp;
+	int cpu, i, sz, r;
+	unsigned long map[PAGE_SIZE / sizeof(struct dentry)
+					/ BITS_PER_LONG + 1];
+
+	cachep = dentry_cache;
+	if (cachep->num >= sizeof(map) * 8)
+		return -E2BIG;
+
+	/* drain all CPU caches to have up-to-date free map */
+	ac = list3_data(cachep)->shared;
+	kmem_cache_free_block(cachep, (void **)(ac + 1), ac->avail);
+	ac->avail = 0;
+	for_each_cpu(cpu) {
+		ac = cachep->array[cpu];
+		kmem_cache_free_block(cachep, (void **)(ac + 1), ac->avail);
+		ac->avail = 0;
+	}
+
+	list_for_each_entry(slabp, &list3_data(cachep)->slabs_full, list) {
+		touch_nmi_watchdog();
+		for (i = 0, objp = slabp->s_mem;
+		     i < cachep->num;
+		     i++, objp += cachep->objsize) {
+#if SLAB_DEBUG
+			r = (*fun)((struct dentry *)(objp + cachep->dbghead));
+#else
+			r = (*fun)((struct dentry *)objp);
+#endif
+			if (r)
+				return r;
+		}
+	}
+
+	list_for_each_entry(slabp, &list3_data(cachep)->slabs_partial, list) {
+		touch_nmi_watchdog();
+		memset(map, 0xff, sizeof(map));
+		for (i = slabp->free, r = 0;
+		     i != BUFCTL_END;
+		     i = slab_bufctl(slabp)[i], r++) {
+			if (r > cachep->num)
+				return -1;
+			__clear_bit(i, map);
+		}
+		sz = sizeof(map) * BITS_PER_LONG;
+		for (i = find_first_bit(map, sz);
+		     i < cachep->num;
+		     i = find_next_bit(map, sz, i + 1)) {
+			objp = slabp->s_mem + i * cachep->objsize;
+#if SLAB_DEBUG
+			r = (*fun)((struct dentry *)(objp + cachep->dbghead));
+#else
+			r = (*fun)((struct dentry *)objp);
+#endif
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
+static int ub_dentry_accton(void *data)
+{
+	struct user_beancounter *ub;
+	int err;
+
+	ub = get_exec_ub();
+	set_exec_ub(get_ub0());
+	err = ub_dentry_walk(&ub_dentry_acctinit);
+	if (!err)
+		err = ub_dentry_walk(&ub_dentry_acctcount);
+	set_exec_ub(ub);
+	if (err == 0)
+		ub_dentry_on = 1;
+	return err;
+}
+
+static int ub_dentry_acctoff(void *data)
+{
+	int ret;
+	ret = ub_dentry_walk(&ub_dentry_acctdrop);
+	if (ret == 0)
+		ub_dentry_on = 0;
+	return ret;
+}
+
+/*
+ * Main function turning dcache accounting on and off.
+ * Called with preemption disabled (for caller's convenience).
+ */
+static void ub_dentry_switch(int onoff, unsigned long pages, int (*fun)(void *))
+{
+	static char *s[] = { "off", "on" };
+	unsigned long start_jiffies;
+	int err, tm;
+
+	start_jiffies = jiffies;
+	preempt_enable();
+	ub_dentry_alloc_barrier = 1;
+	/* ensure ub_dentry_alloc_barrier is visible on all CPUs */
+	mb();  synchronize_kernel();
+	down_write(&ub_dentry_alloc_sem);
+	if (ub_dentry_on == onoff)
+		goto done;
+
+	printk("UBC: preparing to turn dcache accounting %s, "
+			"size %lu pages, watermarks %lu %lu\n",
+			s[onoff], pages, checklowat, checkhiwat);
+	err = stop_machine_run(fun, NULL, NR_CPUS);
+	if (err) {
+		printk(KERN_ERR "UBC: ERROR: dcache accounting switch %d\n",
+				err);
+		preempt_disable();
+		checklowat = 0;
+		checkhiwat = ULONG_MAX;
+		sysctl_ub_dentry_chk = INT_MAX;
+		preempt_enable();
+	} else {
+		tm = jiffies_to_msecs(jiffies - start_jiffies);
+		printk("UBC: turning dcache accounting %s succeeded, "
+				"usage %lu, time %u.%03u\n",
+				s[onoff],
+				get_ub0()->ub_parms[UB_DCACHESIZE].held,
+				tm / 1000, tm % 1000);
+	}
+
+done:
+	ub_dentry_alloc_barrier = 0;
+	up_write(&ub_dentry_alloc_sem);
+	preempt_disable();
+}
+
+void ub_dentry_checkup(void)
+{
+	int *p;
+	unsigned long pages;
+
+	preempt_disable();
+	p = &__get_cpu_var(checkcnt);
+	if (++*p > sysctl_ub_dentry_chk) {
+		*p = 0;
+		pages = dentry_cache->grown
+			- dentry_cache->reaped
+			- dentry_cache->shrunk;
+		pages <<= dentry_cache->gfporder;
+		if (ub_dentry_on) {
+			if (pages < checklowat)
+				ub_dentry_switch(0, pages, &ub_dentry_acctoff);
+		} else {
+			if (pages >= checkhiwat)
+				ub_dentry_switch(1, pages, &ub_dentry_accton);
+		}
+	}
+	preempt_enable();
+}
+
+static void ub_dentry_set_limits(unsigned long pages, unsigned long cap)
+{
+	down_write(&ub_dentry_alloc_sem);
+	preempt_disable();
+	checklowat = (pages >> 10) * sysctl_ub_lowat;
+	checkhiwat = (pages >> 10) * sysctl_ub_hiwat;
+	if (checkhiwat > cap) {
+		checkhiwat = cap;
+		checklowat = cap / sysctl_ub_hiwat * sysctl_ub_lowat;
+	}
+	preempt_enable();
+	up_write(&ub_dentry_alloc_sem);
+}
+
+static int ub_dentry_proc_handler(ctl_table *ctl, int write, struct file *filp,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int r;
+
+	r = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	if (!r && write)
+		ub_dentry_set_limits(totalram_pages - totalhigh_pages,
+				ULONG_MAX);
+	return r;
+}
+
+static ctl_table ub_dentry_sysctl_table[] = {
+	{
+		.ctl_name	= 1000,
+		.procname	= "dentry_check",
+		.data		= &sysctl_ub_dentry_chk,
+		.maxlen		= sizeof(sysctl_ub_dentry_chk),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= 1001,
+		.procname	= "dentry_watermark",
+		.data		= &sysctl_ub_lowat,
+		.maxlen		= sizeof(sysctl_ub_lowat) * 2,
+		.mode		= 0644,
+		.proc_handler	= &ub_dentry_proc_handler,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table ub_dentry_sysctl_root[] = {
+	{
+		.ctl_name	= 23681,
+		.procname	= "ubc",
+		.mode		= 0555,
+		.child		= ub_dentry_sysctl_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static int __init ub_dentry_init(void)
+{
+	/*
+	 * Initial watermarks are limited, to limit walk time.
+	 * 384MB translates into 0.8 sec on PIII 866MHz.
+	 */
+	ub_dentry_set_limits(totalram_pages - totalhigh_pages,
+			384 * 1024 * 1024 / PAGE_SIZE);
+	if (register_sysctl_table(ub_dentry_sysctl_root, 0) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+__initcall(ub_dentry_init);
diff -Nurap linux-2.6.9-100.orig/kernel/ub/ub_mem.c linux-2.6.9-ve023stab054/kernel/ub/ub_mem.c
--- linux-2.6.9-100.orig/kernel/ub/ub_mem.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/ub_mem.c	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,468 @@
+/*
+ *  kernel/ub/ub_mem.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/kmem_slab.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/swap.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/ub_hash.h>
+
+/*
+ * Initialization
+ */
+
+extern void __init page_beancounters_init(void);
+
+void __init page_ubc_init(void)
+{
+#ifdef CONFIG_USER_RSS_ACCOUNTING
+	page_beancounters_init();
+#endif
+}
+
+/*
+ * Slab accounting
+ */
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+
+#define CC_HASH_SIZE	1024
+static struct ub_cache_counter *cc_hash[CC_HASH_SIZE];
+spinlock_t cc_lock;
+
+static void __free_cache_counters(struct user_beancounter *ub,
+		kmem_cache_t *cachep)
+{
+	struct ub_cache_counter *cc, **pprev, *del;
+	int i;
+	unsigned long flags;
+
+	del = NULL;
+	spin_lock_irqsave(&cc_lock, flags);
+	for (i = 0; i < CC_HASH_SIZE; i++) {
+		pprev = &cc_hash[i];
+		cc = cc_hash[i];
+		while (cc != NULL) {
+			if (cc->ub != ub && cc->cachep != cachep) {
+				pprev = &cc->next;
+				cc = cc->next;
+				continue;
+			}
+
+			list_del(&cc->ulist);
+			*pprev = cc->next;
+			cc->next = del;
+			del = cc;
+			cc = *pprev;
+		}
+	}
+	spin_unlock_irqrestore(&cc_lock, flags);
+
+	while (del != NULL) {
+		cc = del->next;
+		kfree(del);
+		del = cc;
+	}
+}
+
+void ub_free_counters(struct user_beancounter *ub)
+{
+	__free_cache_counters(ub, NULL);
+}
+
+void ub_kmemcache_free(kmem_cache_t *cachep)
+{
+	__free_cache_counters(NULL, cachep);
+}
+
+void __init init_cache_counters(void)
+{
+	memset(cc_hash, 0, CC_HASH_SIZE * sizeof(cc_hash[0]));
+	spin_lock_init(&cc_lock);
+}
+
+#define cc_hash_fun(ub, cachep)	(				\
+	(((unsigned long)(ub) >> L1_CACHE_SHIFT) ^		\
+	 ((unsigned long)(ub) >> (BITS_PER_LONG / 2)) ^		\
+	 ((unsigned long)(cachep) >> L1_CACHE_SHIFT) ^		\
+	 ((unsigned long)(cachep) >> (BITS_PER_LONG / 2))	\
+	) & (CC_HASH_SIZE - 1))
+
+static int change_slab_charged(struct user_beancounter *ub, void *objp,
+		unsigned long val)
+{
+	struct ub_cache_counter *cc, *new_cnt, **pprev;
+	kmem_cache_t *cachep;
+	unsigned long flags;
+
+	cachep = GET_PAGE_CACHE(virt_to_page(objp));
+	new_cnt = NULL;
+
+again:
+	spin_lock_irqsave(&cc_lock, flags);
+	cc = cc_hash[cc_hash_fun(ub, cachep)];
+	while (cc) {
+		if (cc->ub == ub && cc->cachep == cachep)
+			goto found;
+		cc = cc->next;
+	}
+
+	if (new_cnt != NULL)
+		goto insert;
+
+	spin_unlock_irqrestore(&cc_lock, flags);
+
+	new_cnt = kmalloc(sizeof(*new_cnt), GFP_ATOMIC);
+	if (new_cnt == NULL)
+		return -ENOMEM;
+
+	new_cnt->counter = 0;
+	new_cnt->ub = ub;
+	new_cnt->cachep = cachep;
+	goto again;
+
+insert:
+	pprev = &cc_hash[cc_hash_fun(ub, cachep)];
+	new_cnt->next = *pprev;
+	*pprev = new_cnt;
+	list_add(&new_cnt->ulist, &ub->ub_cclist);
+	cc = new_cnt;
+	new_cnt = NULL;
+
+found:
+	cc->counter += val;
+	spin_unlock_irqrestore(&cc_lock, flags);
+	if (new_cnt)
+		kfree(new_cnt);
+	return 0;
+}
+
+static inline int inc_slab_charged(struct user_beancounter *ub, void *objp)
+{
+	return change_slab_charged(ub, objp, 1);
+}
+
+static inline void dec_slab_charged(struct user_beancounter *ub, void *objp)
+{
+	if (change_slab_charged(ub, objp, -1, 0) < 0)
+		BUG();
+}
+
+#include <linux/vmalloc.h>
+
+static inline int inc_pages_charged(struct user_beancounter *ub,
+		struct page *pg, int order)
+{
+	int cpu;
+
+	cpu = get_cpu();
+	ub->ub_pages_charged[cpu]++;
+	put_cpu();
+	return 0;
+}
+
+static inline void dec_pages_charged(struct user_beancounter *ub,
+		struct page *pg, int order)
+{
+	int cpu;
+
+	cpu = get_cpu();
+	ub->ub_pages_charged[cpu]--;
+	put_cpu();
+}
+
+void inc_vmalloc_charged(struct vm_struct *vm, int flags)
+{
+	int cpu;
+	struct user_beancounter *ub;
+
+	if (!(flags & __GFP_UBC))
+		return;
+
+	ub = get_exec_ub();
+	if (ub == NULL)
+		return;
+
+	cpu = get_cpu();
+	ub->ub_vmalloc_charged[cpu] += vm->nr_pages;
+	put_cpu();
+}
+
+void dec_vmalloc_charged(struct vm_struct *vm)
+{
+	int cpu;
+	struct user_beancounter *ub;
+
+	ub = page_ub(vm->pages[0]);
+	if (ub == NULL)
+		return;
+
+	cpu = get_cpu();
+	ub->ub_vmalloc_charged[cpu] -= vm->nr_pages;
+	put_cpu();
+}
+
+#else
+#define inc_slab_charged(ub, o)		(0)
+#define dec_slab_charged(ub, o)		do { } while (0)
+#define inc_pages_charged(ub, pg, o) 	(0)
+#define dec_pages_charged(ub, pg, o)	do { } while (0)
+#endif
+
+static inline struct user_beancounter **slab_ub_ref(void *objp)
+{
+	struct page *pg;
+	kmem_cache_t *cachep;
+	struct slab *slabp;
+	int objnr;
+
+	pg = virt_to_page(objp);
+	cachep = GET_PAGE_CACHE(pg);
+	BUG_ON(!(cachep->flags & SLAB_UBC));
+	slabp = GET_PAGE_SLAB(pg);
+	objnr = (objp - slabp->s_mem) / cachep->objsize;
+	return slab_ubcs(cachep, slabp) + objnr;
+}
+
+struct user_beancounter *slab_ub(void *objp)
+{
+	struct user_beancounter **ub_ref;
+
+	ub_ref = slab_ub_ref(objp);
+	return *ub_ref;
+}
+
+EXPORT_SYMBOL(slab_ub);
+
+#define UB_KMEM_QUANT	(PAGE_SIZE * 4)
+
+/* called with IRQ disabled */
+static int ub_kmemsize_charge(struct user_beancounter *ub,
+		unsigned long size,
+		enum ub_severity strict)
+{
+	struct task_beancounter *tbc;
+
+	tbc = task_bc(current);
+	if (ub != tbc->task_ub || size > UB_KMEM_QUANT)
+		goto just_charge;
+	if (tbc->kmem_precharged >= size) {
+		tbc->kmem_precharged -= size;
+		return 0;
+	}
+
+	if (charge_beancounter(ub, UB_KMEMSIZE, UB_KMEM_QUANT, UB_HARD) == 0) {
+		tbc->kmem_precharged += UB_KMEM_QUANT - size;
+		return 0;
+	}
+
+just_charge:
+	return charge_beancounter(ub, UB_KMEMSIZE, size, strict);
+}
+
+/* called with IRQ disabled */
+static void ub_kmemsize_uncharge(struct user_beancounter *ub,
+		unsigned long size)
+{
+	struct task_beancounter *tbc;
+
+	if (size > UB_MAXVALUE) {
+		printk("ub_kmemsize_uncharge: size %lu\n", size);
+		dump_stack();
+	}
+
+	tbc = task_bc(current);
+	if (ub != tbc->task_ub)
+		goto just_uncharge;
+
+	tbc->kmem_precharged += size;
+	if (tbc->kmem_precharged < UB_KMEM_QUANT * 2)
+		return;
+	size = tbc->kmem_precharged - UB_KMEM_QUANT;
+	tbc->kmem_precharged -= size;
+
+just_uncharge:
+	uncharge_beancounter(ub, UB_KMEMSIZE, size);
+}
+
+/* called with IRQ disabled */
+int ub_slab_charge(void *objp, int flags)
+{
+	unsigned int size;
+	struct user_beancounter *ub;
+
+	ub = get_beancounter(get_exec_ub());
+	if (ub == NULL)
+		return 0;
+
+	size = CHARGE_SIZE(kmem_obj_memusage(objp));
+	if (ub_kmemsize_charge(ub, size,
+				(flags & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
+		goto out_err;
+
+	if (inc_slab_charged(ub, objp) < 0) {
+		ub_kmemsize_uncharge(ub, size);
+		goto out_err;
+	}
+	*slab_ub_ref(objp) = ub;
+	return 0;
+
+out_err:
+	put_beancounter(ub);
+	return -ENOMEM;
+}
+
+/* called with IRQ disabled */
+void ub_slab_uncharge(void *objp)
+{
+	unsigned int size;
+	struct user_beancounter **ub_ref;
+
+	ub_ref = slab_ub_ref(objp);
+	if (*ub_ref == NULL)
+		return;
+
+	dec_slab_charged(*ub_ref, objp);
+	size = CHARGE_SIZE(kmem_obj_memusage(objp));
+	ub_kmemsize_uncharge(*ub_ref, size);
+	put_beancounter(*ub_ref);
+	*ub_ref = NULL;
+}
+
+static void ub_bad_page(struct page *page, struct user_beancounter *ub,
+		char *str)
+{
+	page_ub(page) = NULL;
+	printk(KERN_ERR "UBC: BUG: fix page state for %p: %s\n", page, str);
+	printk(KERN_ERR "  flags: 0x%0*lx mapping: %p mapcount: %d count: %d\n",
+		(int)(2*sizeof(page_flags_t)), (unsigned long)page->flags,
+		page->mapping, page_mapcount(page), page_count(page));
+	printk(KERN_ERR "  UB %p mg %lu id %d\n", ub, ub->ub_magic, ub->ub_uid);
+	dump_stack();
+}
+
+static inline void ub_check_null(struct page *page)
+{
+	struct user_beancounter *ub;
+
+	ub = page_ub(page);
+	if (unlikely(ub != NULL))
+		ub_bad_page(page, ub, "ub is not NULL");
+}
+
+static inline int ub_check_magic(struct page *pg, struct user_beancounter *ub)
+{
+	if (likely(ub->ub_magic == UB_MAGIC))
+		return 0;
+
+	ub_bad_page(pg, ub, "ub magic is bad");
+	return -1;
+}
+
+/*
+ * Pages accounting
+ */
+
+int ub_page_charge(struct page *page, int order, int mask)
+{
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	ub = NULL;
+	if (!(mask & __GFP_UBC))
+		goto out;
+
+	ub = get_beancounter(get_exec_ub());
+	if (ub == NULL)
+		goto out;
+
+	local_irq_save(flags);
+	if (ub_kmemsize_charge(ub, CHARGE_ORDER(order),
+				(mask & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
+		goto err;
+	if (inc_pages_charged(ub, page, order) < 0) {
+		ub_kmemsize_uncharge(ub, CHARGE_ORDER(order));
+		goto err;
+	}
+	local_irq_restore(flags);
+out:
+	ub_check_null(page);
+	page_ub(page) = ub;
+	return 0;
+
+err:
+	local_irq_restore(flags);
+	ub_check_null(page);
+	put_beancounter(ub);
+	return -ENOMEM;
+}
+
+void ub_page_uncharge(struct page *page, int order)
+{
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	ub = page_ub(page);
+	if (ub == NULL)
+		return;
+
+	if (ub_check_magic(page, ub))
+		return;
+
+	dec_pages_charged(ub, page, order);
+	local_irq_save(flags);
+	ub_kmemsize_uncharge(ub, CHARGE_ORDER(order));
+	local_irq_restore(flags);
+	put_beancounter(ub);
+	page_ub(page) = NULL;
+}
+
+/* 
+ * takes init_mm.page_table_lock 
+ * some outer lock to protect pages from vmalloced area must be held
+ */
+struct user_beancounter *vmalloc_ub(void *obj)
+{
+	struct page *pg;
+
+	spin_lock(&init_mm.page_table_lock);
+	pg = follow_page_k((unsigned long)obj, 0);
+	spin_unlock(&init_mm.page_table_lock);
+	if (pg == NULL)
+		return NULL;
+
+	return page_ub(pg);
+}
+
+EXPORT_SYMBOL(vmalloc_ub);
+
+struct user_beancounter *mem_ub(void *obj)
+{
+	struct user_beancounter *ub;
+
+	if ((unsigned long)obj >= VMALLOC_START &&
+	    (unsigned long)obj  < VMALLOC_END)
+		ub = vmalloc_ub(obj);
+	else
+		ub = slab_ub(obj);
+
+	return ub;
+}
+
+EXPORT_SYMBOL(mem_ub);
diff -Nurap linux-2.6.9-100.orig/kernel/ub/ub_misc.c linux-2.6.9-ve023stab054/kernel/ub/ub_misc.c
--- linux-2.6.9-100.orig/kernel/ub/ub_misc.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/ub_misc.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,434 @@
+/*
+ *  kernel/ub/ub_misc.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+
+#define UB_FILE_MINQUANT	3
+#define UB_FILE_MAXQUANT	10
+#define UB_FILE_INIQUANT	4
+
+static unsigned long ub_file_precharge(struct task_beancounter *task_bc,
+		struct user_beancounter *ub, unsigned long *kmemsize);
+
+static inline unsigned long ub_file_kmemsize(unsigned long nr)
+{
+	return CHARGE_SIZE(kmem_cache_memusage(filp_cachep)) * nr;
+}
+
+/*
+ * Task staff
+ */
+
+static void init_task_sub(struct task_struct *parent,
+		struct task_struct *tsk,
+		struct task_beancounter *old_bc)
+{
+	struct task_beancounter *new_bc;
+	struct user_beancounter *sub;
+
+	new_bc = task_bc(tsk);
+	sub = old_bc->fork_sub;
+	new_bc->fork_sub = get_beancounter(sub);
+	new_bc->task_fnode = NULL;
+	new_bc->task_freserv = old_bc->task_freserv;
+	old_bc->task_freserv = NULL;
+	memset(&new_bc->task_data, 0, sizeof(new_bc->task_data));
+	new_bc->pgfault_handle = 0;
+	new_bc->pgfault_allot = 0;
+}
+
+void ub_init_task_bc(struct task_beancounter *tbc)
+{
+	tbc->file_precharged = 0;
+	tbc->file_quant = UB_FILE_INIQUANT;
+	tbc->file_count = 0;
+
+	tbc->kmem_precharged = 0;
+	tbc->dentry_alloc = 0;
+}
+
+int ub_task_charge(struct task_struct *parent, struct task_struct *task)
+{
+	struct task_beancounter *old_bc;
+	struct task_beancounter *new_bc;
+	struct user_beancounter *ub, *pub;
+	unsigned long file_nr, kmemsize;
+	unsigned long flags;
+
+	old_bc = task_bc(parent);
+	ub = old_bc->fork_sub;
+	new_bc = task_bc(task);
+	new_bc->task_ub = get_beancounter(ub);
+	new_bc->exec_ub = get_beancounter(ub);
+
+	for (pub = ub; pub->parent != NULL; pub = pub->parent);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+	if (unlikely(__charge_beancounter_locked(pub, UB_NUMPROC,
+					1, UB_HARD) < 0))
+		goto out_numproc;
+
+	ub_init_task_bc(new_bc);
+	file_nr = ub_file_precharge(new_bc, pub, &kmemsize);
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+
+	charge_beancounter_notop(ub, UB_NUMPROC, 1);
+	if (likely(file_nr)) {
+		charge_beancounter_notop(ub, UB_NUMFILE, file_nr);
+		charge_beancounter_notop(ub, UB_KMEMSIZE, kmemsize);
+	}
+
+	init_task_sub(parent, task, old_bc);
+	return 0;
+
+out_numproc:
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+	__put_beancounter_batch(ub, 2);
+	return -ENOMEM;
+}
+
+extern atomic_t dbgpre;
+
+void ub_task_uncharge(struct task_struct *task)
+{
+	struct task_beancounter *task_bc;
+	struct user_beancounter *pub;
+	unsigned long file_nr, kmemsize;
+	unsigned long flags;
+
+	task_bc = task_bc(task);
+	for (pub = task_bc->task_ub; pub->parent != NULL;
+						pub = pub->parent);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+	__uncharge_beancounter_locked(pub, UB_NUMPROC, 1);
+	file_nr = task_bc->file_precharged;
+	if (likely(file_nr))
+		__uncharge_beancounter_locked(pub,
+				UB_NUMFILE, file_nr);
+
+	/* see comment in ub_file_charge */
+	task_bc->file_precharged = 0;
+	kmemsize = task_bc->kmem_precharged;
+	task_bc->kmem_precharged = 0;
+	kmemsize += ub_file_kmemsize(file_nr);
+	if (likely(kmemsize))
+		__uncharge_beancounter_locked(pub,
+				UB_KMEMSIZE, kmemsize);
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+
+	uncharge_beancounter_notop(task_bc->task_ub, UB_NUMPROC, 1);
+	if (likely(file_nr)) {
+		uncharge_beancounter_notop(task_bc->task_ub,
+				UB_NUMFILE, file_nr);
+		__put_beancounter_batch(task_bc->task_ub, file_nr);
+	}
+	if (likely(kmemsize))
+		uncharge_beancounter_notop(task_bc->task_ub,
+				UB_KMEMSIZE, kmemsize);
+
+	put_beancounter(task_bc->exec_ub);
+	put_beancounter(task_bc->task_ub);
+	put_beancounter(task_bc->fork_sub);
+	/* can't be freed elsewhere, failures possible in the middle of fork */
+	if (task_bc->task_freserv != NULL)
+		kfree(task_bc->task_freserv);
+
+	task_bc->exec_ub = (struct user_beancounter *)0xdeadbcbc;
+	task_bc->task_ub = (struct user_beancounter *)0xdead100c;
+}
+
+/*
+ * Files and file locks.
+ */
+/*
+ * For NUMFILE, we do not take a lock and call charge function
+ * for every file.  We try to charge in batches, keeping local reserve on
+ * task.  For experimental purposes, batch size is adaptive and depends
+ * on numfile barrier, number of processes, and the history of successes and
+ * failures of batch charges.
+ *
+ * Per-task fields have the following meaning
+ *   file_precharged    number of files charged to beancounter in advance,
+ *   file_quant         logarithm of batch size
+ *   file_count         counter of charge successes, to reduce batch size
+ *                      fluctuations.
+ */
+static unsigned long ub_file_precharge(struct task_beancounter *task_bc,
+		struct user_beancounter *ub, unsigned long *kmemsize)
+{
+	unsigned long n, kmem;
+
+	n = 1UL << task_bc->file_quant;
+	if (ub->ub_parms[UB_NUMPROC].held >
+			(ub->ub_parms[UB_NUMFILE].barrier >>
+						task_bc->file_quant))
+		goto nopre;
+	if (unlikely(__charge_beancounter_locked(ub, UB_NUMFILE, n, UB_HARD)))
+		goto nopre;
+	kmem = ub_file_kmemsize(n);
+	if (unlikely(__charge_beancounter_locked(ub, UB_KMEMSIZE,
+					kmem, UB_HARD)))
+		goto nopre_kmem;
+
+	task_bc->file_precharged += n;
+	get_beancounter_batch(task_bc->task_ub, n);
+	task_bc->file_count++;
+	if (task_bc->file_quant < UB_FILE_MAXQUANT &&
+	    task_bc->file_count >= task_bc->file_quant) {
+		task_bc->file_quant++;
+		task_bc->file_count = 0;
+	}
+	*kmemsize = kmem;
+	return n;
+
+nopre_kmem:
+	__uncharge_beancounter_locked(ub, UB_NUMFILE, n);
+nopre:
+	if (task_bc->file_quant > UB_FILE_MINQUANT)
+		task_bc->file_quant--;
+	task_bc->file_count = 0;
+	return 0;
+}
+
+int ub_file_charge(struct file *f)
+{
+	struct user_beancounter *ub, *pub;
+	struct task_beancounter *task_bc;
+	unsigned long file_nr, kmem;
+	unsigned long flags;
+	int err;
+
+	ub = get_exec_ub();
+	task_bc = task_bc(current);
+	if (unlikely(ub != task_bc->task_ub))
+		goto just_charge;
+
+	if (likely(task_bc->file_precharged > 0)) {
+		/*
+		 * files are put via RCU in 2.6.16 so during
+		 * this decrement an IRQ can happen and called
+		 * ub_files_uncharge() will mess file_precharged
+		 *
+		 * ub_task_uncharge() is called via RCU also so no
+		 * protection is needed there
+		 *
+		 * Xemul
+		 */
+
+		local_irq_save(flags);
+		task_bc->file_precharged--;
+		local_irq_restore(flags);
+
+		f->f_ub = ub;
+		return 0;
+	}
+
+	for (pub = ub; pub->parent != NULL; pub = pub->parent);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+	file_nr = ub_file_precharge(task_bc, pub, &kmem);
+	if (unlikely(!file_nr))
+		goto last_try;
+	spin_unlock(&pub->ub_lock);
+	task_bc->file_precharged--;
+	local_irq_restore(flags);
+
+	charge_beancounter_notop(ub, UB_NUMFILE, file_nr);
+	charge_beancounter_notop(ub, UB_KMEMSIZE, kmem);
+	f->f_ub = ub;
+	return 0;
+
+just_charge:
+	for (pub = ub; pub->parent != NULL; pub = pub->parent);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+last_try:
+	kmem = ub_file_kmemsize(1);
+	err = __charge_beancounter_locked(pub, UB_NUMFILE, 1, UB_HARD);
+	if (likely(!err)) {
+		err = __charge_beancounter_locked(pub, UB_KMEMSIZE,
+				kmem, UB_HARD);
+		if (unlikely(err))
+			__uncharge_beancounter_locked(pub, UB_NUMFILE, 1);
+	}
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+	if (likely(!err)) {
+		charge_beancounter_notop(ub, UB_NUMFILE, 1);
+		charge_beancounter_notop(ub, UB_KMEMSIZE, kmem);
+		f->f_ub = get_beancounter(ub);
+	}
+	return err;
+}
+
+static inline int task_precharge_farnr(struct task_beancounter *task_bc)
+{
+	return (task_bc->file_precharged < (1UL << task_bc->file_quant));
+}
+
+void ub_file_uncharge(struct file *f)
+{
+	struct user_beancounter *ub, *pub;
+	struct task_beancounter *task_bc;
+	long nr;
+
+	ub = f->f_ub;
+	task_bc = task_bc(current);
+	if (likely(ub == task_bc->task_ub)) {
+		task_bc->file_precharged++;
+		pub = top_beancounter(ub);
+		if (task_precharge_farnr(task_bc) &&
+			ub_barrier_farnr(pub, UB_NUMFILE) &&
+				ub_barrier_farsz(pub, UB_KMEMSIZE))
+			return;
+		nr = task_bc->file_precharged
+			- (1UL << (task_bc->file_quant - 1));
+		if (nr > 0) {
+			task_bc->file_precharged -= nr;
+			__put_beancounter_batch(ub, nr);
+			uncharge_beancounter(ub, UB_NUMFILE, nr);
+			uncharge_beancounter(ub, UB_KMEMSIZE,
+						ub_file_kmemsize(nr));
+		}
+	} else {
+		uncharge_beancounter(ub, UB_NUMFILE, 1);
+		uncharge_beancounter(ub, UB_KMEMSIZE, ub_file_kmemsize(1));
+		put_beancounter(ub);
+	}
+}
+
+int ub_flock_charge(struct file_lock *fl, int hard)
+{
+	struct user_beancounter *ub;
+	int err;
+
+	/* No need to get_beancounter here since it's already got in slab */
+	ub = slab_ub(fl);
+	if (ub == NULL)
+		return 0;
+
+	err = charge_beancounter(ub, UB_NUMFLOCK, 1, hard ? UB_HARD : UB_SOFT);
+	if (!err)
+		fl->fl_charged = 1;
+	return err;
+}
+
+void ub_flock_uncharge(struct file_lock *fl)
+{
+	struct user_beancounter *ub;
+
+	/* Ub will be put in slab */
+	ub = slab_ub(fl);
+	if (ub == NULL || !fl->fl_charged)
+		return;
+
+	uncharge_beancounter(ub, UB_NUMFLOCK, 1);
+	fl->fl_charged = 0;
+}
+
+/*
+ * Signal handling
+ */
+
+static int do_ub_siginfo_charge(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, size, UB_HARD))
+		goto out_kmem;
+
+	if (__charge_beancounter_locked(ub, UB_NUMSIGINFO, 1, UB_HARD))
+		goto out_num;
+
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return 0;
+
+out_num:
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
+out_kmem:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return -ENOMEM;
+}
+
+static void do_ub_siginfo_uncharge(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
+	__uncharge_beancounter_locked(ub, UB_NUMSIGINFO, 1);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+int ub_siginfo_charge(struct user_beancounter *ub, unsigned long size)
+{
+	struct user_beancounter *p, *q;
+
+	size = CHARGE_SIZE(size);
+	for (p = ub; p != NULL; p = p->parent) {
+		if (do_ub_siginfo_charge(p, size))
+			goto unroll;
+	}
+	return 0;
+
+unroll:
+	for (q = ub; q != p; q = q->parent)
+		do_ub_siginfo_uncharge(q, size);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(ub_siginfo_charge);
+
+void ub_siginfo_uncharge(struct user_beancounter *ub, unsigned long size)
+{
+	size = CHARGE_SIZE(size);
+	for (; ub != NULL; ub = ub->parent)
+		do_ub_siginfo_uncharge(ub, size);
+}
+
+/*
+ * PTYs
+ */
+
+int ub_pty_charge(struct tty_struct *tty)
+{
+	struct user_beancounter *ub;
+	int retval;
+
+	ub = tty_ub(tty);
+	retval = 0;
+	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
+			!test_bit(TTY_CHARGED, &tty->flags)) {
+		retval = charge_beancounter(ub, UB_NUMPTY, 1, UB_HARD);
+		if (!retval)
+			set_bit(TTY_CHARGED, &tty->flags);
+	}
+	return retval;
+}
+
+void ub_pty_uncharge(struct tty_struct *tty)
+{
+	struct user_beancounter *ub;
+
+	ub = tty_ub(tty);
+	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
+			test_bit(TTY_CHARGED, &tty->flags)) {
+		uncharge_beancounter(ub, UB_NUMPTY, 1);
+		clear_bit(TTY_CHARGED, &tty->flags);
+	}
+}
diff -Nurap linux-2.6.9-100.orig/kernel/ub/ub_net.c linux-2.6.9-ve023stab054/kernel/ub/ub_net.c
--- linux-2.6.9-100.orig/kernel/ub/ub_net.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/ub_net.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,1143 @@
+/*
+ *  linux/kernel/ub/ub_net.c
+ *
+ *  Copyright (C) 1998-2004  Andrey V. Savochkin <saw@saw.sw.com.sg>
+ *  Copyright (C) 2005 SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * TODO:
+ *   - sizeof(struct inode) charge
+ *   = tcp_mem_schedule() feedback based on ub limits
+ *   + measures so that one socket won't exhaust all send buffers,
+ *     see bug in bugzilla
+ *   = sk->socket check for NULL in snd_wakeups
+ *     (tcp_write_space checks for NULL itself)
+ *   + in tcp_close(), orphaned socket abortion should be based on ubc
+ *     resources (same in tcp_out_of_resources)
+ *     Beancounter should also have separate orphaned socket counter...
+ *   + for rcv, in-order segment should be accepted
+ *     if only barrier is exceeded
+ *   = tcp_rmem_schedule() feedback based on ub limits
+ *   - repair forward_alloc mechanism for receive buffers
+ *     It's idea is that some buffer space is pre-charged so that receive fast
+ *     path doesn't need to take spinlocks and do other heavy stuff
+ *   + tcp_prune_queue actions based on ub limits
+ *   + window adjustments depending on available buffers for receive
+ *   - window adjustments depending on available buffers for send
+ *   + race around usewreserv
+ *   + avoid allocating new page for each tiny-gram, see letter from ANK
+ *   + rename ub_sock_lock
+ *   + sk->sleep wait queue probably can be used for all wakeups, and
+ *     sk->ub_wait is unnecessary
+ *   + for UNIX sockets, the current algorithm will lead to
+ *     UB_UNIX_MINBUF-sized messages only for non-blocking case
+ *   - charge for af_packet sockets
+ *   + all datagram sockets should be charged to NUMUNIXSOCK
+ *   - we do not charge for skb copies and clones staying in device queues
+ *   + live-lock if number of sockets is big and buffer limits are small
+ *     [diff-ubc-dbllim3]
+ *   - check that multiple readers/writers on the same socket won't cause fatal
+ *     consequences
+ *   - check allocation/charge orders
+ *   + There is potential problem with callback_lock.  In *snd_wakeup we take
+ *     beancounter first, in sock_def_error_report - callback_lock first.
+ *     then beancounter.  This is not a problem if callback_lock taken
+ *     readonly, but anyway...
+ *   - SKB_CHARGE_SIZE doesn't include the space wasted by slab allocator
+ * General kernel problems:
+ *   - in tcp_sendmsg(), if allocation fails, non-blocking sockets with ASYNC
+ *     notification won't get signals
+ *   - datagram_poll looks racy
+ *
+ */
+
+#include <linux/net.h>
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#include <net/sock.h>
+#include <net/tcp.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_net.h>
+#include <ub/ub_debug.h>
+
+/* by some reason it is not used currently */
+#define UB_SOCK_MAINTAIN_WMEMPRESSURE	0
+
+
+/* Skb truesize definition. Bad place. Den */
+
+static inline int skb_chargesize_head(struct sk_buff *skb)
+{
+	return skb_charge_size(skb->end - skb->head +
+				sizeof(struct skb_shared_info));
+}
+
+int skb_charge_fullsize(struct sk_buff *skb)
+{
+	int chargesize;
+	struct sk_buff *skbfrag;
+
+	chargesize = skb_chargesize_head(skb) +
+		PAGE_SIZE * skb_shinfo(skb)->nr_frags;
+	if (likely(skb_shinfo(skb)->frag_list == NULL))
+		return chargesize;
+	for (skbfrag = skb_shinfo(skb)->frag_list;
+	     skbfrag != NULL;
+	     skbfrag = skbfrag->next) {
+		chargesize += skb_charge_fullsize(skbfrag);
+	}
+	return chargesize;
+}
+EXPORT_SYMBOL(skb_charge_fullsize);
+
+static int ub_sock_makewreserv_locked(struct sock *sk,
+		int bufid, unsigned long size);
+
+int ub_too_many_orphans(struct sock *sk, int count)
+{
+	struct user_beancounter *ub;
+
+	if (sock_has_ubc(sk)) {
+		for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
+		if (count >= ub->ub_parms[UB_NUMTCPSOCK].barrier >> 2)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Queueing
+ */
+
+static void ub_sock_snd_wakeup(struct user_beancounter *ub)
+{
+	struct list_head *p;
+	struct sock *sk;
+	struct sock_beancounter *skbc;
+	struct socket *sock;
+	unsigned long added;
+
+	while (!list_empty(&ub->ub_other_sk_list)) {
+		p = ub->ub_other_sk_list.next;
+		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
+		sk = skbc_sock(skbc);
+
+		added = 0;
+		sock = sk->sk_socket;
+		if (sock == NULL) {
+			/* sk being destroyed */
+			list_del_init(&skbc->ub_sock_list);
+			continue;
+		}
+
+		ub_debug(UBD_NET_SLEEP,
+				"Checking queue, waiting %lu, reserv %lu\n",
+				skbc->ub_waitspc, skbc->poll_reserv);
+		added = -skbc->poll_reserv;
+		if (ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF,
+					skbc->ub_waitspc))
+			break;
+		added += skbc->poll_reserv;
+
+		list_del_init(&skbc->ub_sock_list);
+
+		/*
+		 * See comments in ub_tcp_snd_wakeup.
+		 * Locking note: both unix_write_space and
+		 * sock_def_write_space take callback_lock themselves.
+		 * We take it here just to be on the safe side and to
+		 * act the same way as ub_tcp_snd_wakeup does.
+		 */
+		sock_hold(sk);
+		read_lock(&sk->sk_callback_lock);
+		spin_unlock(&ub->ub_lock);
+
+		sk->sk_write_space(sk);
+		read_unlock(&sk->sk_callback_lock);
+
+		if (skbc->ub != ub && added)
+			charge_beancounter_notop(skbc->ub,
+				       	UB_OTHERSOCKBUF, added);
+		sock_put(sk);
+
+		spin_lock(&ub->ub_lock);
+	}
+}
+
+static void ub_tcp_snd_wakeup(struct user_beancounter *ub)
+{
+	struct list_head *p;
+	struct sock *sk;
+	struct sock_beancounter *skbc;
+	struct socket *sock;
+	unsigned long added;
+
+	while (!list_empty(&ub->ub_tcp_sk_list)) {
+		p = ub->ub_tcp_sk_list.next;
+		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
+		sk = skbc_sock(skbc);
+
+		added = 0;
+		sock = sk->sk_socket;
+		if (sock == NULL) {
+			/* sk being destroyed */
+			list_del_init(&skbc->ub_sock_list);
+			continue;
+		}
+
+		ub_debug(UBD_NET_SLEEP,
+				"Checking queue, waiting %lu, reserv %lu\n",
+				skbc->ub_waitspc, skbc->poll_reserv);
+		added = -skbc->poll_reserv;
+		if (ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF,
+					skbc->ub_waitspc))
+			break;
+		added += skbc->poll_reserv;
+
+		list_del_init(&skbc->ub_sock_list);
+
+		/*
+		 * Send async notifications and wake up.
+		 * Locking note: we get callback_lock here because
+		 * tcp_write_space is over-optimistic about calling context
+		 * (socket lock is presumed).  So we get the lock here although
+		 * it belongs to the callback.
+		 */
+		sock_hold(sk);
+		read_lock(&sk->sk_callback_lock);
+		spin_unlock(&ub->ub_lock);
+
+		sk->sk_write_space(sk);
+		read_unlock(&sk->sk_callback_lock);
+
+		if (skbc->ub != ub && added)
+			charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, added);
+		sock_put(sk);
+
+		spin_lock(&ub->ub_lock);
+	}
+}
+
+int ub_sock_snd_queue_add(struct sock *sk, int res, unsigned long size)
+{
+	unsigned long flags;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long added_reserv;
+
+	if (!sock_has_ubc(sk))
+		return 0;
+
+	skbc = sock_bc(sk);
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub_debug(UBD_NET_SLEEP, "attempt to charge for %lu\n", size);
+	added_reserv = -skbc->poll_reserv;
+	if (!ub_sock_makewreserv_locked(sk, res, size)) {
+		/*
+		 * It looks a bit hackish, but it is compatible with both
+		 * wait_for_xx_ubspace and poll.
+		 * This __set_current_state is equivalent to a wakeup event
+		 * right after spin_unlock_irqrestore.
+		 */
+		__set_current_state(TASK_RUNNING);
+		added_reserv += skbc->poll_reserv;
+		spin_unlock_irqrestore(&ub->ub_lock, flags);
+		if (added_reserv)
+			charge_beancounter_notop(skbc->ub, res, added_reserv);
+		return 0;
+	}
+
+	ub_debug(UBD_NET_SLEEP, "Adding sk to queue\n");
+	skbc->ub_waitspc = size;
+	if (!list_empty(&skbc->ub_sock_list)) {
+		ub_debug(UBD_NET_SOCKET,
+				"re-adding socket to beancounter %p.\n", ub);
+		goto out;
+	}
+
+	switch (res) {
+		case UB_TCPSNDBUF:
+			list_add_tail(&skbc->ub_sock_list,
+					&ub->ub_tcp_sk_list);
+			break;
+		case UB_OTHERSOCKBUF:
+			list_add_tail(&skbc->ub_sock_list,
+					&ub->ub_other_sk_list);
+			break;
+		default:
+			BUG();
+	}
+out:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return -ENOMEM;
+}
+
+long ub_sock_wait_for_space(struct sock *sk, long timeo, unsigned long size)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(sk->sk_sleep, &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size))
+			break;
+
+		if (sk->sk_shutdown & SEND_SHUTDOWN)
+			break;
+		if (sk->sk_err)
+			break;
+		ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, size);
+		timeo = schedule_timeout(timeo);
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return timeo;
+}
+
+void ub_sock_sndqueuedel(struct sock *sk)
+{
+	struct user_beancounter *ub;
+	struct sock_beancounter *skbc;
+	unsigned long flags;
+
+	if (!sock_has_ubc(sk))
+		return;
+	skbc = sock_bc(sk);
+
+	/* race with write_space callback of other socket */
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	list_del_init(&skbc->ub_sock_list);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+/*
+ * Helpers
+ */
+
+static inline void __ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
+		       unsigned long size, int resource)
+{
+	skb_bc(skb)->ub = sock_bc(sk)->ub;
+	skb_bc(skb)->charged = size;
+	skb_bc(skb)->resource = resource;
+}
+
+void ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
+		       unsigned long size, int resource)
+{
+	if (!sock_has_ubc(sk))
+		return;
+
+	if (sock_bc(sk)->ub == NULL)
+		BUG();
+
+	__ub_skb_set_charge(skb, sk, size, resource);
+
+	/* Ugly. Ugly. Skb in sk writequeue can live without ref to sk */
+	if (skb->sk == NULL)
+		skb->sk = sk;
+}
+
+static inline void ub_skb_set_uncharge(struct sk_buff *skb)
+{
+	skb_bc(skb)->ub = NULL;
+	skb_bc(skb)->charged = 0;
+	skb_bc(skb)->resource = 0;
+}
+
+static void ub_update_rmem_thres(struct sock_beancounter *skub)
+{
+	struct user_beancounter *ub;
+
+	if (skub && skub->ub) {
+		for (ub = skub->ub; ub->parent != NULL; ub = ub->parent);
+		ub->ub_rmem_thres = ub->ub_parms[UB_TCPRCVBUF].barrier /
+			(ub->ub_parms[UB_NUMTCPSOCK].held + 1);
+	}
+}
+
+static inline void ub_sock_wcharge_dec(struct sock *sk,
+		unsigned long chargesize)
+{
+	/* The check sk->sk_family != PF_NETLINK is made as the skb is
+	 * queued to the kernel end of socket while changed to the user one.
+	 * Den */
+	if (unlikely(sock_bc(sk)->ub_wcharged) && sk->sk_family != PF_NETLINK) {
+		if (sock_bc(sk)->ub_wcharged > chargesize)
+			sock_bc(sk)->ub_wcharged -= chargesize;
+		else
+			sock_bc(sk)->ub_wcharged = 0;
+	}
+}
+
+/*
+ * Charge socket number
+ */
+
+static inline void sk_alloc_beancounter(struct sock *sk)
+{
+	struct sock_beancounter *skbc;
+
+	skbc = sock_bc(sk);
+	memset(skbc, 0, sizeof(struct sock_beancounter));
+}
+
+static inline void sk_free_beancounter(struct sock *sk)
+{
+}
+
+static int __sock_charge(struct sock *sk, int res)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *cub, *ub;
+	unsigned long added_reserv, added_forw;
+	unsigned long flags;
+
+	cub = get_exec_ub();
+	if (unlikely(cub == NULL))
+		return 0;
+
+	sk_alloc_beancounter(sk);
+	skbc = sock_bc(sk);
+	INIT_LIST_HEAD(&skbc->ub_sock_list);
+
+	for (ub = cub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (unlikely(__charge_beancounter_locked(ub, res, 1, UB_HARD) < 0))
+		goto out_limit;
+
+	added_reserv = 0;
+	added_forw = 0;
+	if (res == UB_NUMTCPSOCK) {
+		added_reserv = skb_charge_size(MAX_TCP_HEADER +
+				1500 - sizeof(struct iphdr) -
+					sizeof(struct tcphdr));
+		added_reserv *= 4;
+		ub->ub_parms[UB_TCPSNDBUF].held += added_reserv;
+		if (!ub_barrier_farsz(ub, UB_TCPSNDBUF)) {
+			ub->ub_parms[UB_TCPSNDBUF].held -= added_reserv;
+			added_reserv = 0;
+		}
+		skbc->poll_reserv = added_reserv;
+
+		added_forw = SK_STREAM_MEM_QUANTUM * 4;
+		ub->ub_parms[UB_TCPRCVBUF].held += added_forw;
+		if (!ub_barrier_farsz(ub, UB_TCPRCVBUF)) {
+			ub->ub_parms[UB_TCPRCVBUF].held -= added_forw;
+			added_forw = 0;
+		}
+		skbc->forw_space = added_forw;
+	}
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	charge_beancounter_notop(cub, res, 1);
+	if (added_reserv)
+		charge_beancounter_notop(cub, UB_TCPSNDBUF, added_reserv);
+	if (added_forw)
+		charge_beancounter_notop(cub, UB_TCPRCVBUF, added_forw);
+
+	skbc->ub = get_beancounter(cub);
+	return 0;
+
+out_limit:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	sk_free_beancounter(sk);
+	return -ENOMEM;
+}
+
+int ub_tcp_sock_charge(struct sock *sk)
+{
+	int ret;
+
+	ret = __sock_charge(sk, UB_NUMTCPSOCK);
+	ub_update_rmem_thres(sock_bc(sk));
+
+	return ret;
+}
+
+int ub_other_sock_charge(struct sock *sk)
+{
+	return __sock_charge(sk, UB_NUMOTHERSOCK);
+}
+
+EXPORT_SYMBOL(ub_other_sock_charge);
+
+int ub_sock_charge(struct sock *sk, int family, int type)
+{
+	return (IS_TCP_SOCK(family, type) ?
+			ub_tcp_sock_charge(sk) : ub_other_sock_charge(sk));
+}
+
+/*
+ * Uncharge socket number
+ */
+
+void ub_sock_uncharge(struct sock *sk)
+{
+	int is_tcp_sock;
+	unsigned long flags;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long reserv, forw;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	is_tcp_sock = IS_TCP_SOCK(sk->sk_family, sk->sk_type);
+	skbc = sock_bc(sk);
+	ub_debug(UBD_NET_SOCKET, "Calling ub_sock_uncharge on %p\n", sk);
+
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (!list_empty(&skbc->ub_sock_list)) {
+		ub_debug(UBD_NET_SOCKET,
+			 "ub_sock_uncharge: removing from ub(%p) queue.\n",
+			 skbc);
+		list_del_init(&skbc->ub_sock_list);
+	}
+
+	reserv = skbc->poll_reserv;
+	forw = skbc->forw_space;
+	__uncharge_beancounter_locked(ub,
+			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
+			reserv);
+	if (forw)
+		__uncharge_beancounter_locked(ub,
+				(is_tcp_sock ? UB_TCPRCVBUF : UB_DGRAMRCVBUF),
+				forw);
+	__uncharge_beancounter_locked(ub,
+			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
+
+	ub_sock_wcharge_dec(sk, reserv);
+	if (unlikely(skbc->ub_wcharged))
+		printk(KERN_WARNING
+		       "ub_sock_uncharge: wch=%lu for ub %p (%d).\n",
+		       skbc->ub_wcharged, skbc->ub, skbc->ub->ub_uid);
+	skbc->poll_reserv = 0;
+	skbc->forw_space = 0;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(skbc->ub,
+			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
+			reserv);
+	if (forw)
+		uncharge_beancounter_notop(skbc->ub,
+				(is_tcp_sock ? UB_TCPRCVBUF : UB_DGRAMRCVBUF),
+				forw);
+	uncharge_beancounter_notop(skbc->ub,
+			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
+
+	put_beancounter(skbc->ub);
+	sk_free_beancounter(sk);
+}
+
+/*
+ * Special case for netlink_dump - (un)charges precalculated size
+ */
+
+int ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk)
+{
+	int ret;
+	unsigned long chargesize;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	chargesize = skb_charge_fullsize(skb);
+	ret = charge_beancounter(sock_bc(sk)->ub,
+			UB_DGRAMRCVBUF, chargesize, UB_HARD);
+	if (ret < 0)
+		return ret;
+	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
+	return ret;
+}
+
+/*
+ * Poll reserve accounting
+ *
+ * This is the core of socket buffer management (along with queueing/wakeup
+ * functions.  The rest of buffer accounting either call these functions, or
+ * repeat parts of their logic for some simpler cases.
+ */
+
+static int ub_sock_makewreserv_locked(struct sock *sk,
+		int bufid, unsigned long size)
+{
+	unsigned long wcharge_added;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+
+	skbc = sock_bc(sk);
+	if (skbc->poll_reserv >= size) /* no work to be done */
+		goto out;
+
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	ub->ub_parms[bufid].held += size - skbc->poll_reserv;
+
+	wcharge_added = 0;
+	/*
+	 * Logic:
+	 *  1) when used memory hits barrier, we set wmem_pressure;
+	 *     wmem_pressure is reset under barrier/2;
+	 *     between barrier/2 and barrier we limit per-socket buffer growth;
+	 *  2) each socket is guaranteed to get (limit-barrier)/maxsockets
+	 *     calculated on the base of memory eaten after the barrier is hit
+	 */
+	skbc = sock_bc(sk);
+#if UB_SOCK_MAINTAIN_WMEMPRESSURE
+	if (!ub_hfbarrier_hit(ub, bufid)) {
+		if (ub->ub_wmem_pressure)
+			ub_debug(UBD_NET_SEND, "makewres: pressure -> 0 "
+				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+				sk, size, skbc->poll_reserv,
+				ub->ub_parms[bufid].held,
+				skbc->ub_wcharged, sk->sk_sndbuf);
+		ub->ub_wmem_pressure = 0;
+	}
+#endif
+	if (ub_barrier_hit(ub, bufid)) {
+#if UB_SOCK_MAINTAIN_WMEMPRESSURE
+		if (!ub->ub_wmem_pressure)
+			ub_debug(UBD_NET_SEND, "makewres: pressure -> 1 "
+				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+				sk, size, skbc->poll_reserv,
+				ub->ub_parms[bufid].held,
+				skbc->ub_wcharged, sk->sk_sndbuf);
+		ub->ub_wmem_pressure = 1;
+#endif
+		if (sk->sk_family == PF_NETLINK)
+			goto unroll;
+		wcharge_added = size - skbc->poll_reserv;
+		skbc->ub_wcharged += wcharge_added;
+		if (skbc->ub_wcharged * ub->ub_parms[bid2sid(bufid)].limit +
+				ub->ub_parms[bufid].barrier >
+					ub->ub_parms[bufid].limit)
+			goto unroll_wch;
+	}
+	if (ub->ub_parms[bufid].held > ub->ub_parms[bufid].limit)
+		goto unroll;
+
+	ub_adjust_maxheld(ub, bufid);
+	skbc->poll_reserv = size;
+out:
+	return 0;
+
+unroll_wch:
+	skbc->ub_wcharged -= wcharge_added;
+unroll:
+	ub_debug(UBD_NET_SEND,
+			"makewres: deny "
+			"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+			sk, size, skbc->poll_reserv, ub->ub_parms[bufid].held,
+			skbc->ub_wcharged, sk->sk_sndbuf);
+	ub->ub_parms[bufid].failcnt++;
+	ub->ub_parms[bufid].held -= size - skbc->poll_reserv;
+
+	if (sk->sk_socket != NULL) {
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+	}
+	return -ENOMEM;
+}
+
+int ub_sock_make_wreserv(struct sock *sk, int bufid, unsigned long size)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long flags;
+	unsigned long added_reserv;
+	int err;
+
+	skbc = sock_bc(sk);
+
+	/*
+	 * This function provides that there is sufficient reserve upon return
+	 * only if sk has only one user.  We can check poll_reserv without
+	 * serialization and avoid locking if the reserve already exists.
+	 */
+	if (unlikely(!sock_has_ubc(sk)) || likely(skbc->poll_reserv >= size))
+		return 0;
+
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	added_reserv = -skbc->poll_reserv;
+	err = ub_sock_makewreserv_locked(sk, bufid, size);
+	added_reserv += skbc->poll_reserv;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (added_reserv)
+		charge_beancounter_notop(skbc->ub, bufid, added_reserv);
+
+	return err;
+}
+
+int ub_sock_get_wreserv(struct sock *sk, int bufid, unsigned long size)
+{
+	struct sock_beancounter *skbc;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	/* optimize for the case if socket has sufficient reserve */
+	ub_sock_make_wreserv(sk, bufid, size);
+	skbc = sock_bc(sk);
+	if (likely(skbc->poll_reserv >= size)) {
+		skbc->poll_reserv -= size;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+static void ub_sock_do_ret_wreserv(struct sock *sk, int bufid,
+		unsigned long size, unsigned long ressize)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long extra;
+	unsigned long flags;
+
+	skbc = sock_bc(sk);
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+
+	extra = 0;
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	skbc->poll_reserv += size;
+	if (skbc->poll_reserv > ressize) {
+		extra = skbc->poll_reserv - ressize;
+		ub_sock_wcharge_dec(sk, extra);
+		skbc->poll_reserv = ressize;
+
+		__uncharge_beancounter_locked(ub, bufid, extra);
+		if (bufid == UB_TCPSNDBUF)
+			ub_tcp_snd_wakeup(ub);
+		else
+			ub_sock_snd_wakeup(ub);
+	}
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (extra)
+		uncharge_beancounter_notop(skbc->ub, bufid, extra);
+}
+
+void ub_sock_ret_wreserv(struct sock *sk, int bufid,
+		unsigned long size, unsigned long ressize)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	skbc = sock_bc(sk);
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	/* check if the reserve can be kept */
+	if (ub_barrier_farsz(ub, bufid)) {
+		skbc->poll_reserv += size;
+		return;
+	}
+	ub_sock_do_ret_wreserv(sk, bufid, size, ressize);
+}
+
+/*
+ * UB_DGRAMRCVBUF
+ */
+
+int ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb)
+{
+	unsigned long chargesize;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	chargesize = skb_charge_fullsize(skb);
+	if (charge_beancounter(sock_bc(sk)->ub, UB_DGRAMRCVBUF,
+				 chargesize, UB_HARD))
+		return -ENOMEM;
+
+	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
+	return 0;
+}
+
+EXPORT_SYMBOL(ub_sockrcvbuf_charge);
+
+static void ub_sockrcvbuf_uncharge(struct sk_buff *skb)
+{
+	uncharge_beancounter(skb_bc(skb)->ub, UB_DGRAMRCVBUF,
+			     skb_bc(skb)->charged);
+	ub_skb_set_uncharge(skb);
+}
+EXPORT_SYMBOL(ub_tcprcvbuf_charge_forced);
+
+/*
+ * UB_TCPRCVBUF
+ */
+
+int ub_sock_tcp_chargerecv(struct sock *sk, struct sk_buff *skb,
+			    enum ub_severity strict)
+{
+	int retval;
+	unsigned long flags;
+	struct user_beancounter *ub;
+	struct sock_beancounter *skbc;
+	unsigned long chargesize;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+	skbc = sock_bc(sk);
+
+	chargesize = skb_charge_fullsize(skb);
+	if (likely(skbc->forw_space >= chargesize)) {
+		skbc->forw_space -= chargesize;
+		__ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
+		return 0;
+	}
+
+	/*
+	 * Memory pressure reactions:
+	 *  1) set UB_RMEM_KEEP (clearing UB_RMEM_EXPAND)
+	 *  2) set UB_RMEM_SHRINK and tcp_clamp_window()
+	 *     tcp_collapse_queues() if rmem_alloc > rcvbuf
+	 *  3) drop OFO, tcp_purge_ofo()
+	 *  4) drop all.
+	 * Currently, we do #2 and #3 at once (which means that current
+	 * collapsing of OFO queue in tcp_collapse_queues() is a waste of time,
+	 * for example...)
+	 * On memory pressure we jump from #0 to #3, and when the pressure
+	 * subsides, to #1.
+	 */
+	retval = 0;
+	for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_parms[UB_TCPRCVBUF].held += chargesize;
+	if (ub->ub_parms[UB_TCPRCVBUF].held >
+			ub->ub_parms[UB_TCPRCVBUF].barrier &&
+			strict != UB_FORCE)
+		goto excess;
+	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+out:
+	if (retval == 0) {
+		charge_beancounter_notop(sock_bc(sk)->ub, UB_TCPRCVBUF,
+				chargesize);
+		ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
+	}
+	return retval;
+
+excess:
+	ub->ub_rmem_pressure = UB_RMEM_SHRINK;
+	if (strict == UB_HARD)
+		retval = -ENOMEM;
+	if (ub->ub_parms[UB_TCPRCVBUF].held > ub->ub_parms[UB_TCPRCVBUF].limit)
+		retval = -ENOMEM;
+	/*
+	 * We try to leave numsock*maxadvmss as a reserve for sockets not
+	 * queueing any data yet (if the difference between the barrier and the
+	 * limit is enough for this reserve).
+	 */
+	if (ub->ub_parms[UB_TCPRCVBUF].held +
+			ub->ub_parms[UB_NUMTCPSOCK].limit * ub->ub_maxadvmss
+			> ub->ub_parms[UB_TCPRCVBUF].limit &&
+			atomic_read(&sk->sk_rmem_alloc))
+		retval = -ENOMEM;
+	if (retval) {
+		ub->ub_parms[UB_TCPRCVBUF].held -= chargesize;
+		ub->ub_parms[UB_TCPRCVBUF].failcnt++;
+	}
+	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	goto out;
+}
+EXPORT_SYMBOL(ub_sock_tcp_chargerecv);
+
+static void ub_tcprcvbuf_uncharge(struct sk_buff *skb)
+{
+	unsigned long flags;
+	unsigned long held, bar;
+	int prev_pres;
+	struct user_beancounter *ub;
+
+	for (ub = skb_bc(skb)->ub; ub->parent != NULL; ub = ub->parent);
+	if (ub_barrier_farsz(ub, UB_TCPRCVBUF)) {
+		sock_bc(skb->sk)->forw_space += skb_bc(skb)->charged;
+		ub_skb_set_uncharge(skb);
+		return;
+	}
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (ub->ub_parms[UB_TCPRCVBUF].held < skb_bc(skb)->charged) {
+		printk(KERN_ERR "Uncharging %d for tcprcvbuf of %p with %lu\n",
+				skb_bc(skb)->charged,
+				ub, ub->ub_parms[UB_TCPRCVBUF].held);
+		/* ass-saving bung */
+		skb_bc(skb)->charged = ub->ub_parms[UB_TCPRCVBUF].held;
+	}
+	ub->ub_parms[UB_TCPRCVBUF].held -= skb_bc(skb)->charged;
+	held = ub->ub_parms[UB_TCPRCVBUF].held;
+	bar = ub->ub_parms[UB_TCPRCVBUF].barrier;
+	prev_pres = ub->ub_rmem_pressure;
+	if (held <= bar - (bar >> 2))
+		ub->ub_rmem_pressure = UB_RMEM_EXPAND;
+	else if (held <= bar)
+		ub->ub_rmem_pressure = UB_RMEM_KEEP;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(skb_bc(skb)->ub, UB_TCPRCVBUF,
+			skb_bc(skb)->charged);
+	ub_skb_set_uncharge(skb);
+}
+
+
+/*
+ * UB_OTHERSOCKBUF and UB_TCPSNDBUF
+ */
+
+static void ub_socksndbuf_uncharge(struct sk_buff *skb)
+{
+	unsigned long flags;
+	struct user_beancounter *ub, *cub;
+	unsigned long chargesize;
+
+	cub = skb_bc(skb)->ub;
+	for (ub = cub; ub->parent != NULL; ub = ub->parent);
+	chargesize = skb_bc(skb)->charged;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_OTHERSOCKBUF, chargesize);
+	if (skb->sk != NULL && sock_has_ubc(skb->sk))
+		ub_sock_wcharge_dec(skb->sk, chargesize);
+	ub_sock_snd_wakeup(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(cub, UB_OTHERSOCKBUF, chargesize);
+	ub_skb_set_uncharge(skb);
+}
+
+/* expected to be called under socket lock */
+static void ub_tcpsndbuf_uncharge(struct sk_buff *skb)
+{
+	/*
+	 * ub_sock_ret_wreserv call is abused here, we just want to uncharge
+	 * skb size.  However, to reduce duplication of the code doing
+	 * ub_hfbarrier_hit check, ub_wcharged reduction, and wakeup we call
+	 * a function that already does all of this.  2006/04/27  SAW
+	 */
+	ub_sock_ret_wreserv(skb->sk, UB_TCPSNDBUF, skb_bc(skb)->charged,
+			sock_bc(skb->sk)->poll_reserv);
+	ub_skb_set_uncharge(skb);
+}
+
+void ub_skb_uncharge(struct sk_buff *skb)
+{
+	switch (skb_bc(skb)->resource) {
+		case UB_TCPSNDBUF:
+			ub_tcpsndbuf_uncharge(skb);
+			break;
+		case UB_TCPRCVBUF:
+			ub_tcprcvbuf_uncharge(skb);
+			break;
+		case UB_DGRAMRCVBUF:
+			ub_sockrcvbuf_uncharge(skb);
+			break;
+		case UB_OTHERSOCKBUF:
+			ub_socksndbuf_uncharge(skb);
+			break;
+	}
+}
+
+EXPORT_SYMBOL(ub_skb_uncharge);	/* due to skb_orphan()/conntracks */
+
+/*
+ * Other sock reserve managment
+ */
+
+int ub_sock_getwres_other(struct sock *sk, unsigned long size)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long flags;
+	unsigned long added_reserv;
+	int err;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	/*
+	 * Nothing except beancounter lock protects skbc->poll_reserv.
+	 * So, take the lock and do the job.
+	 * Dances with added_reserv repeat ub_sock_make_wreserv.
+	 */
+	skbc = sock_bc(sk);
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	added_reserv = -skbc->poll_reserv;
+	err = ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF, size);
+	added_reserv += skbc->poll_reserv;
+	if (!err)
+		skbc->poll_reserv -= size;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (added_reserv)
+		charge_beancounter_notop(skbc->ub, UB_OTHERSOCKBUF, added_reserv);
+
+	return err;
+}
+EXPORT_SYMBOL(ub_tcpsndbuf_charge_forced);
+
+void ub_sock_retwres_other(struct sock *sk,
+		unsigned long size, unsigned long ressize)
+{
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	ub_sock_do_ret_wreserv(sk, UB_OTHERSOCKBUF, size, ressize);
+}
+
+/*
+ * TCP send buffers accouting. Paged part
+ */
+
+int ub_sock_tcp_chargepage(struct sock *sk)
+{
+	struct sock_beancounter *skbc;
+	unsigned long extra;
+	int err;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	skbc = sock_bc(sk);
+	ub_sock_make_wreserv(sk, UB_TCPSNDBUF, PAGE_SIZE);
+	if (likely(skbc->poll_reserv >= PAGE_SIZE)) {
+		skbc->poll_reserv -= PAGE_SIZE;
+		return 0;
+	}
+
+	/*
+	 * Ok, full page is not available.
+	 * However, this function must succeed if poll previously indicated
+	 * that write is possible.  We better make a forced charge here
+	 * than reserve a whole page in poll.
+	 */
+	err = ub_sock_make_wreserv(sk, UB_TCPSNDBUF, SOCK_MIN_UBCSPACE);
+	if (unlikely(err < 0))
+		goto out;
+	if (skbc->poll_reserv < PAGE_SIZE) {
+		extra = PAGE_SIZE - skbc->poll_reserv;
+		err = charge_beancounter(skbc->ub, UB_TCPSNDBUF, extra,
+				UB_FORCE);
+		if (err < 0)
+			goto out;
+		skbc->poll_reserv += extra;
+	}
+	skbc->poll_reserv -= PAGE_SIZE;
+	return 0;
+
+out:
+	return err;
+}
+
+void ub_sock_tcp_detachpage(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	/* The page is just detached from socket. The last skb in queue
+	   with paged part holds referrence to it */
+	skb = skb_peek_tail(&sk->sk_write_queue);
+	if (skb == NULL) {
+	   	/* If the queue is empty - all data is sent and page is about
+		   to be freed */
+		ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, PAGE_SIZE,
+				sock_bc(sk)->poll_reserv);
+	} else {
+		/* Last skb is a good aproximation for a last skb with
+		   paged part */
+		skb_bc(skb)->charged += PAGE_SIZE;
+	}
+}
+
+/*
+ * TCPSNDBUF charge functions below are called in the following cases:
+ *  - sending of SYN, SYN-ACK, FIN, the latter charge is forced by
+ *    some technical reasons in TCP code;
+ *  - fragmentation of TCP packets.
+ * These functions are allowed but not required to use poll_reserv.
+ * Originally, these functions didn't do that, since it didn't make
+ * any sense.  Now, since poll_reserv now has a function of general reserve,
+ * they use it.
+ */
+int ub_sock_tcp_chargesend(struct sock *sk, struct sk_buff *skb,
+			    enum ub_severity strict)
+{
+	int ret;
+	unsigned long chargesize;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	skbc = sock_bc(sk);
+	chargesize = skb_charge_fullsize(skb);
+	if (likely(skbc->poll_reserv >= chargesize)) {
+		skbc->poll_reserv -= chargesize;
+		__ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+		/* XXX hack, see ub_skb_set_charge */
+		skb->sk = sk;
+		return 0;
+	}
+
+	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ret = __charge_beancounter_locked(ub, UB_TCPSNDBUF,
+			chargesize, strict);
+	/*
+	 * Note: this check is not equivalent of the corresponding check
+	 * in makewreserv.  It's similar in spirit, but an equivalent check
+	 * would be too long and complicated here.
+	 */
+	if (!ret && ub_barrier_hit(ub, UB_TCPSNDBUF))
+		skbc->ub_wcharged += chargesize;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	if (likely(!ret)) {
+		charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, chargesize);
+		ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(ub_sock_tcp_chargesend);
+
+void ub_sock_tcp_unchargesend(struct sock *sk, unsigned long size)
+{
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+	/* see ub_tcpsndbuf_uncharge */
+	ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, sock_bc(sk)->poll_reserv);
+}
+
+/*
+ * Initialization
+ */
+
+int __init skbc_cache_init(void)
+{
+	return 0;
+}
diff -Nurap linux-2.6.9-100.orig/kernel/ub/ub_oom.c linux-2.6.9-ve023stab054/kernel/ub/ub_oom.c
--- linux-2.6.9-100.orig/kernel/ub/ub_oom.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/ub_oom.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,93 @@
+/*
+ *  kernel/ub/ub_oom.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include <asm/page.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_misc.h>
+#include <ub/ub_hash.h>
+
+static inline long ub_current_overdraft(struct user_beancounter *ub)
+{
+	return ub->ub_parms[UB_OOMGUARPAGES].held +
+		((ub->ub_parms[UB_KMEMSIZE].held
+		  + ub->ub_parms[UB_TCPSNDBUF].held
+		  + ub->ub_parms[UB_TCPRCVBUF].held
+		  + ub->ub_parms[UB_OTHERSOCKBUF].held
+		  + ub->ub_parms[UB_DGRAMRCVBUF].held)
+		 >> PAGE_SHIFT)	- ub->ub_parms[UB_OOMGUARPAGES].barrier;
+}
+
+/*
+ * Select an user_beancounter to find task inside it to be killed.
+ * Select the beancounter with the biggest excess of resource usage
+ * to kill a process belonging to that beancounter later, or returns
+ * NULL if there are no beancounters with such excess.
+ */
+
+struct user_beancounter *ub_select_worst(long *ub_maxover)
+{
+	struct user_beancounter *ub, *walkp;
+	unsigned long flags;
+	int i;
+
+	*ub_maxover = 0;
+	ub = NULL;
+	spin_lock_irqsave(&ub_hash_lock, flags);
+
+	for_each_beancounter(i, walkp) {
+		long ub_overdraft;
+
+		if (walkp->parent != NULL)
+			continue;
+		if (walkp->ub_oom_noproc)
+			continue;
+		ub_overdraft = ub_current_overdraft(walkp);
+		if (ub_overdraft > *ub_maxover) {
+			ub = walkp;
+			*ub_maxover = ub_overdraft;
+		}
+	}
+	get_beancounter(ub);
+	if(ub)
+		ub->ub_oom_noproc = 1;
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	return ub;
+}
+
+void ub_oomkill_task(struct mm_struct * mm, struct user_beancounter *ub, 
+		long maxover)
+{
+	static struct ub_rate_info ri = { 5, 60*HZ };
+
+	/* increment is serialized with oom_generation_lock */
+	mm_ub(mm)->ub_parms[UB_OOMGUARPAGES].failcnt++;
+
+	if (ub_ratelimit(&ri))
+		show_mem();
+}
+
+void ub_clear_oom(void)
+{
+	unsigned long flags;
+	int i;
+	struct user_beancounter *walkp;
+
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	for_each_beancounter(i, walkp)
+		walkp->ub_oom_noproc = 0;
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+}
diff -Nurap linux-2.6.9-100.orig/kernel/ub/ub_page_bc.c linux-2.6.9-ve023stab054/kernel/ub/ub_page_bc.c
--- linux-2.6.9-100.orig/kernel/ub/ub_page_bc.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/ub_page_bc.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,457 @@
+/*
+ *  kernel/ub/ub_page_bc.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_vmpages.h>
+#include <ub/ub_page.h>
+
+static kmem_cache_t *pb_cachep;
+static spinlock_t pb_lock = SPIN_LOCK_UNLOCKED;
+static struct page_beancounter **pb_hash_table;
+static unsigned int pb_hash_mask;
+
+/*
+ * Auxiliary staff
+ */
+
+static inline struct page_beancounter *next_page_pb(struct page_beancounter *p)
+{
+	return list_entry(p->page_list.next, struct page_beancounter,
+			page_list);
+}
+
+static inline struct page_beancounter *prev_page_pb(struct page_beancounter *p)
+{
+	return list_entry(p->page_list.prev, struct page_beancounter,
+			page_list);
+}
+
+/*
+ * Held pages manipulation
+ */
+static inline void set_held_pages(struct user_beancounter *bc)
+{
+	/* all three depend on ub_held_pages */
+	__ub_update_physpages(bc);
+	__ub_update_oomguarpages(bc);
+	__ub_update_privvm(bc);
+}
+
+static inline void do_dec_held_pages(struct user_beancounter *ub, int value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_held_pages -= value;
+	set_held_pages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static void dec_held_pages(struct user_beancounter *ub, int value)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_dec_held_pages(ub, value);
+}
+
+static inline void do_inc_held_pages(struct user_beancounter *ub, int value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_held_pages += value;
+	set_held_pages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static void inc_held_pages(struct user_beancounter *ub, int value)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_inc_held_pages(ub, value);
+}
+
+/*
+ * Alloc - free
+ */
+
+inline int pb_alloc(struct page_beancounter **pbc)
+{
+	*pbc = kmem_cache_alloc(pb_cachep, GFP_KERNEL);
+	if (*pbc != NULL)
+		(*pbc)->pb_magic = PB_MAGIC;
+	return (*pbc == NULL);
+}
+
+inline void pb_free(struct page_beancounter **pb)
+{
+	if (*pb != NULL) {
+		kmem_cache_free(pb_cachep, *pb);
+		*pb = NULL;
+	}
+}
+
+void pb_free_list(struct page_beancounter **p_pb)
+{
+	struct page_beancounter *list = *p_pb, *pb;
+	while (list) {
+		pb = list;
+	        list = list->next_hash;
+		pb_free(&pb);
+	}
+	*p_pb = NULL;
+}
+
+/*
+ * head -> <new objs> -> <old objs> -> ...
+ */
+static int __alloc_list(struct page_beancounter **head, int num)
+{
+	struct page_beancounter *pb;
+
+	while (num > 0) {
+		if (pb_alloc(&pb))
+			return -1;
+		pb->next_hash = *head;
+		*head = pb;
+		num--;
+	}
+
+	return num;
+}
+
+/* 
+ * Ensure that the list contains at least num elements.
+ * p_pb points to an initialized list, may be of the zero length. 
+ *
+ * mm->page_table_lock should be held
+ */
+int pb_alloc_list(struct page_beancounter **p_pb, int num,
+		struct mm_struct *mm)
+{
+	struct page_beancounter *list;
+
+	for (list = *p_pb; list != NULL && num; list = list->next_hash, num--);
+	if (!num)
+		return 0;
+
+	spin_unlock(&mm->page_table_lock);
+	/*
+	 *  *p_pb(after)       *p_pb (before)
+	 *     \                  \
+	 *     <new objs> -...-> <old objs> -> ...
+	 */
+	if (__alloc_list(p_pb, num) < 0)
+		goto nomem;
+	spin_lock(&mm->page_table_lock);
+	return 0;
+
+nomem:
+	spin_lock(&mm->page_table_lock);
+	pb_free_list(p_pb);
+	return -ENOMEM;
+}
+
+/*
+ * Hash routines
+ */
+
+static inline int pb_hash(struct user_beancounter *ub, struct page *page)
+{
+	return (page_to_pfn(page) + (ub->ub_uid << 10)) & pb_hash_mask;
+}
+
+/* pb_lock should be held */
+static inline void insert_pb(struct page_beancounter *p, struct page *page,
+		struct user_beancounter *ub, int hash)
+{
+	p->page = page;
+	p->ub = get_beancounter(ub);
+	p->next_hash = pb_hash_table[hash];
+	pb_hash_table[hash] = p;
+}
+
+/*
+ * Heart
+ */
+
+int pb_reserve_all(struct page_beancounter **pbs)
+{
+	int i, need_alloc;
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	need_alloc = 0;
+	for_each_beancounter(i, ub)
+		need_alloc++;
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	if (!__alloc_list(pbs, need_alloc))
+		return 0;
+
+	pb_free_list(pbs);
+	return -ENOMEM;
+}
+
+static inline int pb_check_magic(struct page *page)
+{
+	struct page_beancounter *pb;
+	int i, pbs_found;
+	struct page_beancounter **q;
+
+	pb = page_pbc(page);
+	if (likely(pb == NULL || pb->pb_magic == PB_MAGIC))
+		return 0;
+
+	page_pbc(page) = NULL;
+	printk(KERN_ERR "UBC: BUG: fixing bad page PBs for %p\n", page);
+	printk(KERN_ERR "  flags: 0x%0*lx mapping: %p mapcount: %d count: %d\n",
+		(int)(2*sizeof(page_flags_t)), (unsigned long)page->flags,
+		page->mapping, page_mapcount(page), page_count(page));
+	printk(KERN_ERR "  PB is %p magic %lu\n", pb, pb->pb_magic);
+	dump_stack();
+
+	pbs_found = 0;
+	for (i = 0; i < pb_hash_mask + 1; i++) {
+		q = &pb_hash_table[i];
+		while (1) {
+			pb = *q;
+			if (pb == NULL)
+				break;
+
+			if (pb->page == page) {
+				printk(KERN_ERR "  Leak PB %p for UB %p\n",
+						pb, pb->ub);
+				*q = pb->next_hash;
+				pbs_found++;
+			}
+			q = &pb->next_hash;
+		}
+	}
+
+	printk(KERN_ERR "  removed %d PBs from hash\n", pbs_found);
+	return -1;
+}
+
+int pb_add_ref(struct page *page, struct user_beancounter *bc,
+		struct page_beancounter **p_pb)
+{
+	int hash;
+	struct page_beancounter *p;
+	int shift;
+	struct page_beancounter *head;
+
+	if (bc == NULL)
+		return 0;
+	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
+		return 0;
+
+	hash = pb_hash(bc, page);
+
+	spin_lock(&pb_lock);
+	for (p = pb_hash_table[hash];
+			p != NULL && (p->page != page || p->ub != bc);
+			p = p->next_hash);
+	if (p != NULL) {
+		/* 
+		 * This page is already associated with this beancounter,
+		 * increment the usage counter. 
+		 */
+		PB_COUNT_INC(p->refcount);
+		spin_unlock(&pb_lock);
+		return 0;
+	}
+
+	p = *p_pb;
+	if (p == NULL) {
+		spin_unlock(&pb_lock);
+		return -1;
+	}
+
+	if (pb_check_magic(page)) {
+		spin_unlock(&pb_lock);
+		return 0;
+	}
+
+	*p_pb = NULL;
+	insert_pb(p, page, bc, hash);
+	head = page_pbc(page);
+
+	if (head != NULL) {
+		/* 
+		 * Move the first element to the end of the list.
+		 * List head (pb_head) is set to the next entry.
+		 * Note that this code works even if head is the only element
+		 * on the list (because it's cyclic). 
+		 */
+		page_pbc(page) = next_page_pb(head);
+		PB_SHIFT_INC(head->refcount);
+		shift = PB_SHIFT_GET(head->refcount);
+		/* 
+		 * Update user beancounter, the share of head has been changed.
+		 * Note that the shift counter is taken after increment. 
+		 */
+		dec_held_pages(head->ub, UB_PAGE_WEIGHT >> shift);
+		/* add the new page beancounter to the end of the list */
+		list_add_tail(&p->page_list, &page_pbc(page)->page_list);
+	} else {
+		page_pbc(page) = p;
+		shift = 0;
+		INIT_LIST_HEAD(&p->page_list);
+	}
+
+	p->refcount = PB_REFCOUNT_MAKE(shift, 1);
+	spin_unlock(&pb_lock);
+
+	/* update user beancounter for the new page beancounter */
+	inc_held_pages(bc, UB_PAGE_WEIGHT >> shift);
+	return 0;
+}
+
+void pb_remove_ref(struct page *page, struct user_beancounter *bc)
+{
+	int hash;
+	struct page_beancounter *p, **q;
+	int shift, shiftt;
+
+	if (bc == NULL)
+		return;
+	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
+		return;
+
+	hash = pb_hash(bc, page);
+
+	spin_lock(&pb_lock);
+	if (pb_check_magic(page))
+		goto out_unlock;
+
+	for (q = pb_hash_table + hash, p = *q;
+			p != NULL && (p->page != page || p->ub != bc);
+			q = &p->next_hash, p = *q);
+	if (p == NULL)
+		goto out_unlock;
+
+	PB_COUNT_DEC(p->refcount);
+	if (PB_COUNT_GET(p->refcount))
+		/* 
+		 * More references from the same user beancounter exist.
+		 * Nothing needs to be done. 
+		 */
+		goto out_unlock;
+
+	/* remove from the hash list */
+	*q = p->next_hash;
+
+	shift = PB_SHIFT_GET(p->refcount);
+
+	dec_held_pages(p->ub, UB_PAGE_WEIGHT >> shift);
+
+	if (page_pbc(page) == p) {
+		if (list_empty(&p->page_list))
+			goto out_free;
+		page_pbc(page) = next_page_pb(p);
+	}
+	list_del(&p->page_list);
+	put_beancounter(p->ub);
+	pb_free(&p);
+
+	/* Now balance the list.  Move the tail and adjust its shift counter. */
+	p = prev_page_pb(page_pbc(page));
+	shiftt = PB_SHIFT_GET(p->refcount);
+	page_pbc(page) = p;
+	PB_SHIFT_DEC(p->refcount);
+
+	inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
+
+	/* 
+	 * If the shift counter of the moved beancounter is different from the
+	 * removed one's, repeat the procedure for one more tail beancounter 
+	 */
+	if (shiftt > shift) {
+		p = prev_page_pb(page_pbc(page));
+		page_pbc(page) = p;
+		PB_SHIFT_DEC(p->refcount);
+		inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
+	}
+	spin_unlock(&pb_lock);
+	return;
+
+out_free:
+	page_pbc(page) = NULL;
+	put_beancounter(p->ub);
+	pb_free(&p);
+out_unlock:
+	spin_unlock(&pb_lock);
+	return;
+}
+
+void pb_add_list_ref(struct page *page, struct user_beancounter *src_bc,
+		struct user_beancounter *bc,
+		struct page_beancounter **p_pb)
+{
+	struct page_beancounter *list, *pb;
+
+	pb = *p_pb;
+	if (pb == NULL) {
+		/* Typical case due to caller constraints */
+		if (pb_add_ref(page, bc, &pb)) {
+			printk("BUG: page %p not referenced by pb!\n", page);
+			printk("     srcbc %d, bc %d\n",
+					src_bc ? src_bc->ub_uid : -1, bc->ub_uid);
+			dump_stack();
+		}
+		return;
+	}
+
+	list = pb->next_hash;
+	if (pb_add_ref(page, bc, &pb))
+		BUG();
+	if (pb != NULL) {
+		pb->next_hash = list;
+		list = pb;
+	}
+	*p_pb = list;
+}
+
+struct user_beancounter *pb_grab_page_ub(struct page *page)
+{
+	struct page_beancounter *pb;
+	struct user_beancounter *ub;
+
+	spin_lock(&pb_lock);
+	pb = page_pbc(page);
+	ub = (pb == NULL ? ERR_PTR(-EINVAL) :
+			get_beancounter(pb->ub));
+	spin_unlock(&pb_lock);
+	return ub;
+}
+
+void __init page_beancounters_init(void)
+{
+	unsigned long hash_size;
+
+	pb_cachep = kmem_cache_create("page_beancounter", 
+			sizeof(struct page_beancounter), 0,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+	hash_size = num_physpages >> 2;
+	for (pb_hash_mask = 1;
+		(hash_size & pb_hash_mask) != hash_size;
+		pb_hash_mask = (pb_hash_mask << 1) + 1);
+	hash_size = pb_hash_mask + 1;
+	printk(KERN_INFO "Page beancounter hash is %lu entries.\n", hash_size);
+	pb_hash_table = vmalloc(hash_size * sizeof(struct page_beancounter *));
+	memset(pb_hash_table, 0, hash_size * sizeof(struct page_beancounter *));
+}
diff -Nurap linux-2.6.9-100.orig/kernel/ub/ub_pages.c linux-2.6.9-ve023stab054/kernel/ub/ub_pages.c
--- linux-2.6.9-100.orig/kernel/ub/ub_pages.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/ub_pages.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,489 @@
+/*
+ *  kernel/ub/ub_pages.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/virtinfo.h>
+#include <linux/module.h>
+
+#include <asm/page.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+
+void fastcall __ub_update_physpages(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_PHYSPAGES].held = ub->ub_tmpfs_respages
+		+ (ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT);
+	ub_adjust_maxheld(ub, UB_PHYSPAGES);
+}
+
+void fastcall __ub_update_oomguarpages(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_OOMGUARPAGES].held =
+		ub->ub_parms[UB_PHYSPAGES].held + ub->ub_swap_pages;
+	ub_adjust_maxheld(ub, UB_OOMGUARPAGES);
+}
+
+void fastcall __ub_update_privvm(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_PRIVVMPAGES].held =
+		(ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT)
+		+ ub->ub_unused_privvmpages
+		+ ub->ub_parms[UB_SHMPAGES].held;
+	ub_adjust_maxheld(ub, UB_PRIVVMPAGES);
+}
+
+static inline unsigned long pages_in_pte(pte_t *pte)
+{
+	struct page *pg;
+
+	if (!pte_present(*pte))
+		return 0;
+
+	pg = pte_page(*pte);
+	if (!pfn_valid(page_to_pfn(pg)))
+		return 0;
+	if (PageReserved(pg))
+		return 0;
+	return 1;
+}
+
+static inline unsigned long pages_in_pmd(pmd_t *pmd,
+		unsigned long start, unsigned long end)
+{
+	unsigned long pages, pmd_end, address;
+	pte_t *pte;
+
+	pages = 0;
+	if (pmd_none(*pmd))
+		goto out;
+	if (pmd_bad(*pmd)) {
+		pmd_ERROR(*pmd);
+		pmd_clear(pmd);
+		goto out;
+	}
+
+	pte = pte_offset_map(pmd, start);
+	pmd_end = (start + PMD_SIZE) & PMD_MASK;
+	if (pmd_end && (end > pmd_end))
+		end = pmd_end;
+
+	address = start;
+	do {
+		pages += pages_in_pte(pte);
+		address += PAGE_SIZE;
+		pte++;
+	} while (address && (address < end));
+	pte_unmap(pte-1);	
+out:
+	return pages;
+}
+
+static inline unsigned long pages_in_pgd(pgd_t *pgd, 
+		unsigned long start, unsigned long end)
+{
+	unsigned long pages, pgd_end, address;
+	pmd_t *pmd;
+
+	pages = 0;
+	if (pgd_none(*pgd))
+		goto out;
+	if (pgd_bad(*pgd)) {
+		pgd_ERROR(*pgd);
+		pgd_clear(pgd);
+		goto out;
+	}
+
+	pmd = pmd_offset(pgd, start);
+	pgd_end = (start + PGDIR_SIZE) & PGDIR_MASK;
+	if (pgd_end && (end > pgd_end))
+		end = pgd_end;
+
+	address = start;
+	do {
+		pages += pages_in_pmd(pmd, address, end);
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address && (address < end));
+out:
+	return pages;
+}
+
+/*
+ * Calculate number of pages presenting in the address space within single
+ * vm_area.  mm->page_table_lock must be already held.
+ */
+unsigned long pages_in_vma_range(struct vm_area_struct *vma, 
+		unsigned long start, unsigned long end)
+{
+	unsigned long address, pages;
+	pgd_t *pgd;
+
+	pages = 0;
+	address = start;
+	pgd = pgd_offset(vma->vm_mm, start);
+	do {
+		pages += pages_in_pgd(pgd, address, end);
+		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+		pgd++;
+	} while (address && (address < end));
+
+	return pages;
+}
+
+int ub_unused_privvm_inc(struct user_beancounter *ub, long size,
+		struct vm_area_struct *vma)
+{
+	unsigned long flags;
+
+	if (ub == NULL || !VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
+		return 0;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_unused_privvmpages += size;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	return 0;
+}
+
+static void __unused_privvm_dec_locked(struct user_beancounter *ub, 
+		long size)
+{
+	/* catch possible overflow */
+	if (ub->ub_unused_privvmpages < size) {
+		uncharge_warn(ub, UB_UNUSEDPRIVVM,
+				size, ub->ub_unused_privvmpages);
+		size = ub->ub_unused_privvmpages;
+	}
+	ub->ub_unused_privvmpages -= size;
+	__ub_update_privvm(ub);
+}
+
+void __ub_unused_privvm_dec(struct user_beancounter *ub, long size)
+{
+	unsigned long flags;
+
+	if (ub == NULL)
+		return;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__unused_privvm_dec_locked(ub, size);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_unused_privvm_dec(struct user_beancounter *ub, long size,
+		struct vm_area_struct *vma)
+{
+	if (VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
+		__ub_unused_privvm_dec(ub, size);
+}
+
+static inline int __charge_privvm_locked(struct user_beancounter *ub, 
+		unsigned long s, enum ub_severity strict)
+{
+	if (__charge_beancounter_locked(ub, UB_PRIVVMPAGES, s, strict) < 0)
+		return -ENOMEM;
+
+	ub->ub_unused_privvmpages += s;
+	return 0;
+}
+
+int ub_privvm_charge(struct user_beancounter *ub, unsigned long vm_flags, 
+		struct file *vm_file, unsigned long size)
+{
+	int retval;
+	unsigned long flags;
+
+	if (ub == NULL || !VM_UB_PRIVATE(vm_flags, vm_file))
+		return 0;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	retval = __charge_privvm_locked(ub, size >> PAGE_SHIFT, UB_SOFT);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return retval;
+}
+
+void ub_privvm_uncharge(struct user_beancounter *ub, unsigned long vm_flags,
+		struct file *vm_file, unsigned long size)
+{
+	unsigned long flags;
+
+	if (ub == NULL || !VM_UB_PRIVATE(vm_flags, vm_file))
+		return;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__unused_privvm_dec_locked(ub, size >> PAGE_SHIFT);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+int ub_protected_charge(struct user_beancounter *ub, unsigned long size,
+		unsigned long newflags, struct vm_area_struct *vma)
+{
+	unsigned long flags;
+	struct file *file;
+
+	if (ub == NULL)
+		return PRIVVM_NO_CHARGE;
+
+	flags = vma->vm_flags;
+	if (!((newflags ^ flags) & VM_WRITE))
+		return PRIVVM_NO_CHARGE;
+
+	file = vma->vm_file;
+	if (!VM_UB_PRIVATE(newflags | VM_WRITE, file))
+		return PRIVVM_NO_CHARGE;
+
+	if (flags & VM_WRITE)
+		return PRIVVM_TO_SHARED;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_privvm_locked(ub, size, UB_SOFT) < 0)
+		goto err;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return PRIVVM_TO_PRIVATE;
+
+err:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return PRIVVM_ERROR;
+}
+
+int ub_locked_mem_charge(struct user_beancounter *ub, long size)
+{
+	if (ub == NULL)
+		return 0;
+
+	return charge_beancounter(ub, UB_LOCKEDPAGES,
+				size >> PAGE_SHIFT, UB_HARD);
+}
+
+void ub_locked_mem_uncharge(struct user_beancounter *ub, long size)
+{
+	if (ub == NULL)
+		return;
+
+	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
+}
+
+int ub_shmpages_charge(struct user_beancounter *ub, unsigned long size)
+{
+	int ret;
+	unsigned long flags;
+
+	ret = 0;
+	if (ub == NULL)
+		return 0;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ret = __charge_beancounter_locked(ub, UB_SHMPAGES, size, UB_HARD);
+	if (ret == 0)
+		__ub_update_privvm(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return ret;
+}
+
+void ub_shmpages_uncharge(struct user_beancounter *ub, unsigned long size)
+{
+	unsigned long flags;
+
+	if (ub == NULL)
+		return;
+
+	for (; ub->parent != NULL; ub = ub->parent);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_SHMPAGES, size);
+	__ub_update_privvm(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+int ub_memory_charge(struct user_beancounter *ub, unsigned long size,
+		unsigned vm_flags, struct file *vm_file, int sv)
+{
+	struct user_beancounter *ubl;
+	unsigned long flags;
+
+	if (ub == NULL)
+		return 0;
+
+	size >>= PAGE_SHIFT;
+
+	if (size > UB_MAXVALUE)
+		return -EINVAL;
+
+	BUG_ON(sv != UB_SOFT && sv != UB_HARD);
+
+	if ((vm_flags & VM_LOCKED) &&
+	    charge_beancounter(ub, UB_LOCKEDPAGES, size, sv))
+		goto out_err;
+	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
+		for (ubl = ub; ubl->parent != NULL; ubl = ubl->parent);
+		spin_lock_irqsave(&ubl->ub_lock, flags);
+		if (__charge_privvm_locked(ubl, size, sv))
+			goto out_private;
+		spin_unlock_irqrestore(&ubl->ub_lock, flags);
+	}
+	return 0;
+
+out_private:
+	spin_unlock_irqrestore(&ubl->ub_lock, flags);
+	if (vm_flags & VM_LOCKED)
+		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
+out_err:
+	return -ENOMEM;
+}
+
+void ub_memory_uncharge(struct user_beancounter *ub, unsigned long size,
+		unsigned vm_flags, struct file *vm_file)
+{
+	unsigned long flags;
+
+	if (ub == NULL)
+		return;
+
+	size >>= PAGE_SHIFT;
+
+	if (vm_flags & VM_LOCKED)
+		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
+	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
+		for (; ub->parent != NULL; ub = ub->parent);
+		spin_lock_irqsave(&ub->ub_lock, flags);
+		__unused_privvm_dec_locked(ub, size);
+		spin_unlock_irqrestore(&ub->ub_lock, flags);
+	}
+}
+
+static inline void do_ub_tmpfs_respages_inc(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_tmpfs_respages += size;
+	__ub_update_physpages(ub);
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_tmpfs_respages_inc(struct user_beancounter *ub,
+		unsigned long size)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_ub_tmpfs_respages_inc(ub, size);
+}
+
+static inline void do_ub_tmpfs_respages_dec(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	/* catch possible overflow */
+	if (ub->ub_tmpfs_respages < size) {
+		uncharge_warn(ub, UB_TMPFSPAGES,
+				size, ub->ub_tmpfs_respages);
+		size = ub->ub_tmpfs_respages;
+	}
+	ub->ub_tmpfs_respages -= size;
+	/* update values what is the most interesting */
+	__ub_update_physpages(ub);
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_tmpfs_respages_dec(struct user_beancounter *ub,
+		unsigned long size)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_ub_tmpfs_respages_dec(ub, size);
+}
+
+#ifdef CONFIG_USER_SWAP_ACCOUNTING
+static inline void do_ub_swapentry_inc(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_swap_pages++;
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_swapentry_inc(struct user_beancounter *ub)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_ub_swapentry_inc(ub);
+}
+EXPORT_SYMBOL(ub_swapentry_inc);
+
+static inline void do_ub_swapentry_dec(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (ub->ub_swap_pages < 1)
+		uncharge_warn(ub, UB_SWAPPAGES, 1, ub->ub_swap_pages);
+	else
+		ub->ub_swap_pages -= 1;
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_swapentry_dec(struct user_beancounter *ub)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_ub_swapentry_dec(ub);
+}
+#endif
+
+static int vmguar_enough_memory(struct vnotifier_block *self,
+		unsigned long event, void *arg, int old_ret)
+{
+	struct user_beancounter *ub;
+
+	if (event != VIRTINFO_ENOUGHMEM)
+		return old_ret;
+	/*
+	 * If it's a kernel thread, don't care about it.
+	 * Added in order aufsd to run smoothly over ramfs.
+	 */
+	if (current->mm == NULL)
+		return NOTIFY_DONE;
+
+	for (ub = mm_ub(current->mm); ub->parent != NULL; ub = ub->parent);
+	if (ub->ub_parms[UB_PRIVVMPAGES].held >
+			ub->ub_parms[UB_VMGUARPAGES].barrier)
+		return old_ret;
+
+	return NOTIFY_OK;
+}
+
+static struct vnotifier_block vmguar_notifier_block = {
+	.notifier_call = vmguar_enough_memory
+};
+
+static int __init init_vmguar_notifier(void)
+{
+	virtinfo_notifier_register(VITYPE_GENERAL, &vmguar_notifier_block);
+	return 0;
+}
+
+static void __exit fini_vmguar_notifier(void)
+{
+	virtinfo_notifier_unregister(VITYPE_GENERAL, &vmguar_notifier_block);
+}
+
+module_init(init_vmguar_notifier);
+module_exit(fini_vmguar_notifier);
diff -Nurap linux-2.6.9-100.orig/kernel/ub/ub_proc.c linux-2.6.9-ve023stab054/kernel/ub/ub_proc.c
--- linux-2.6.9-100.orig/kernel/ub/ub_proc.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/ub_proc.c	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,380 @@
+/*
+ *  linux/fs/proc/proc_ub.c
+ *
+ *  Copyright (C)  1998-2000  Andrey V. Savochkin <saw@saw.sw.com.sg>
+ *  Copyright (C)  2005       SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * TODO:
+ *
+ * Changes:
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_debug.h>
+
+#include <asm/page.h>
+#include <asm/uaccess.h>
+
+/* 
+ * we have 8 format strings depending on:
+ * 1. BITS_PER_LONG
+ * 2. CONFIG_UBC_KEEP_UNUSED
+ * 3. resource number (see out_proc_beancounter)
+ */
+
+#ifdef CONFIG_UBC_KEEP_UNUSED
+#define REF_FORMAT	"%5.5s %4i: %-12s "
+#define UID_HEAD_STR	"uid ref"
+#else
+#define REF_FORMAT	"%10.10s: %-12s "
+#define UID_HEAD_STR	"uid"
+#endif
+#define REF2_FORMAT	"%10s  %-12s "
+
+#if BITS_PER_LONG == 32
+#define RES_FORMAT	"%10lu %10lu %10lu %10lu %10lu"
+#define HEAD_FORMAT	"%10s %10s %10s %10s %10s"
+#define UB_PROC_LINE_TEXT	(10+2+12+1+10+1+10+1+10+1+10+1+10)
+#else
+#define RES_FORMAT	"%20lu %20lu %20lu %20lu %20lu"
+#define HEAD_FORMAT	"%20s %20s %20s %20s %20s"
+#define UB_PROC_LINE_TEXT	(10+2+12+1+20+1+20+1+20+1+20+1+20)
+#endif
+
+#define UB_PROC_LINE_LEN	(UB_PROC_LINE_TEXT + 1)
+
+static void out_proc_version(char *buf)
+{
+	int len;
+
+	len = sprintf(buf, "Version: 2.5");
+	memset(buf + len, ' ', UB_PROC_LINE_TEXT - len);
+	buf[UB_PROC_LINE_TEXT] = '\n';
+}
+
+static void out_proc_head(char *buf)
+{
+	sprintf(buf, REF2_FORMAT HEAD_FORMAT,
+			UID_HEAD_STR, "resource", "held", "maxheld", 
+			"barrier", "limit", "failcnt");
+	buf[UB_PROC_LINE_TEXT] = '\n';
+}
+
+static void out_proc_beancounter(char *buf, struct user_beancounter *ub, int r)
+{
+	if (r == 0) {
+		char tmpbuf[64];
+		print_ub_uid(ub, tmpbuf, sizeof(tmpbuf));
+		sprintf(buf, REF_FORMAT RES_FORMAT, 
+			tmpbuf, 
+#ifdef CONFIG_UBC_KEEP_UNUSED
+			atomic_read(&ub->ub_refcount),
+#endif
+			ub_rnames[r], ub->ub_parms[r].held, 
+			ub->ub_parms[r].maxheld, ub->ub_parms[r].barrier, 
+			ub->ub_parms[r].limit, ub->ub_parms[r].failcnt);
+	} else
+		sprintf(buf, REF2_FORMAT RES_FORMAT, 
+			"", ub_rnames[r],
+			ub->ub_parms[r].held, ub->ub_parms[r].maxheld,
+			ub->ub_parms[r].barrier, ub->ub_parms[r].limit,
+			ub->ub_parms[r].failcnt);
+
+	buf[UB_PROC_LINE_TEXT] = '\n';
+}
+
+static int ub_accessible(struct user_beancounter *ub,
+		struct user_beancounter *exec_ub,
+		struct file *file)
+{
+	struct user_beancounter *p, *q;
+
+	for (p = exec_ub; p->parent != NULL; p = p->parent);
+	for (q = ub; q->parent != NULL; q = q->parent);
+	if (p != get_ub0() && q != p)
+		return 0;
+	if (ub->parent == NULL)
+		return 1;
+	return file->private_data == NULL ? 0 : 1;
+}
+
+static ssize_t ub_proc_read(struct file *file, char *usrbuf, size_t len,
+		loff_t *poff)
+{
+	ssize_t retval;
+	char *buf;
+	unsigned long flags;
+	int i, resource;
+	struct ub_hash_slot *slot;
+	struct user_beancounter *ub;
+	struct user_beancounter *exec_ub = get_exec_ub();
+	loff_t n, off;
+	int rem, produced, job, tocopy;
+	const int is_capable =
+		(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH));
+
+	retval = -ENOBUFS;
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (buf == NULL)
+		goto out;
+
+	retval = 0;
+	if (!is_capable)
+		goto out_free;
+
+	off = *poff;
+	if (off < 0) /* can't happen, just in case */
+		goto inval;
+
+again:
+	i = 0;
+	slot = ub_hash;
+	n = off; /* The amount of data tp skip */
+	produced = 0;
+	if (n < (UB_PROC_LINE_LEN * 2)) {
+		if (n < UB_PROC_LINE_LEN) {
+			out_proc_version(buf);
+			produced += UB_PROC_LINE_LEN;
+			n += UB_PROC_LINE_LEN;
+		}
+		out_proc_head(buf + produced);
+		produced += UB_PROC_LINE_LEN;
+		n += UB_PROC_LINE_LEN;
+	}
+	n -= (2 * UB_PROC_LINE_LEN);
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	while (1) {
+		for (ub = slot->ubh_beans; 
+		     ub != NULL && n >= (UB_RESOURCES * UB_PROC_LINE_LEN);
+		     ub = ub->ub_next)
+			if (is_capable && ub_accessible(ub, exec_ub, file))
+				n -= (UB_RESOURCES * UB_PROC_LINE_LEN);
+		if (ub != NULL || ++i >= UB_HASH_SIZE)
+			break;
+		++slot;
+	}
+	rem = n; /* the amount of the data in the buffer to skip */
+	job = PAGE_SIZE - UB_PROC_LINE_LEN + 1; /* end of buffer data */
+	if (len < job - rem)
+		job = rem + len;
+	while (ub != NULL && produced < job) {
+		if (is_capable && ub_accessible(ub, exec_ub, file))
+			for (resource = 0;
+				produced < job && resource < UB_RESOURCES;
+				resource++, produced += UB_PROC_LINE_LEN)
+			{
+				out_proc_beancounter(buf + produced,
+						ub, resource);
+			}
+		if (produced >= job)
+			break;
+		/* Find the next beancounter to produce more data. */
+		ub = ub->ub_next;
+		while (ub == NULL && ++i < UB_HASH_SIZE) {
+			++slot;
+			ub = slot->ubh_beans;
+		}
+	}
+
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+	ub_debug(UBD_ALLOC, KERN_DEBUG "UB_PROC: produced %d, job %d, rem %d\n",
+			produced, job, rem);
+
+	/* 
+	 * Temporary buffer `buf' contains `produced' bytes.
+	 * Extract no more than `len' bytes at offset `rem'.
+	 */
+	if (produced <= rem)
+		goto out_free;
+	tocopy = produced - rem;
+	if (len < tocopy)
+		tocopy = len;
+	if (!tocopy)
+		goto out_free;
+	if (copy_to_user(usrbuf, buf + rem, tocopy))
+		goto fault;
+	off += tocopy; /* can't overflow */
+	*poff = off;
+	len -= tocopy;
+	retval += tocopy;
+	if (!len)
+		goto out_free;
+	usrbuf += tocopy;
+	goto again;
+
+fault:
+	retval = -EFAULT;
+out_free:
+	free_page((unsigned long)buf);
+out:
+	return retval;
+
+inval:
+	retval = -EINVAL;
+	goto out_free;
+}
+
+static int ub_proc_open(struct inode *inode, struct file *file)
+{
+	file->private_data = strcmp(file->f_dentry->d_name.name,
+						"user_beancounters") ?
+						(void *)-1 : NULL;
+	return 0;
+}
+
+static struct file_operations ub_file_operations = {
+	.read = &ub_proc_read,
+	.open = &ub_proc_open
+};
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+#include <linux/seq_file.h>
+#include <linux/kmem_cache.h>
+
+static void *ubd_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+	struct user_beancounter *ub;
+	long slot;
+
+	spin_lock_irq(&ub_hash_lock);
+	for (slot = 0; slot < UB_HASH_SIZE; slot++)
+		for (ub = ub_hash[slot].ubh_beans; ub; ub = ub->ub_next) {
+			if (n == 0) {
+				m->private = (void *)slot;
+				return (void *)ub;
+			}
+			n--;
+		}
+	return NULL;
+}
+
+static void *ubd_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	struct user_beancounter *ub;
+	long slot;
+
+	ub = (struct user_beancounter *)p;
+	slot = (long)m->private;
+
+	++*pos;
+	ub = ub->ub_next;
+	while (1) {
+		for (; ub; ub = ub->ub_next) {
+			m->private = (void *)slot;
+			return (void *)ub;
+		}
+		slot++;
+		if (slot == UB_HASH_SIZE)
+			break;
+		ub = ub_hash[slot].ubh_beans;
+	}
+	return NULL;
+}
+
+static void ubd_stop(struct seq_file *m, void *p)
+{
+	spin_unlock_irq(&ub_hash_lock);
+}
+
+#define PROC_LINE_FMT	"\t%-17s\t%5lu\t%5lu\n"
+
+static int ubd_show(struct seq_file *m, void *p)
+{
+	struct user_beancounter *ub;
+	struct ub_cache_counter *cc;
+	long pages, vmpages;
+	int i;
+	char id[64];
+
+	ub = (struct user_beancounter *)p;
+	print_ub_uid(ub, id, sizeof(id));
+	seq_printf(m, "%s:\n", id);
+
+	pages = vmpages = 0;
+	for (i = 0; i < NR_CPUS; i++) {
+		pages += ub->ub_pages_charged[i];
+		vmpages += ub->ub_vmalloc_charged[i];
+	}
+	if (pages < 0)
+		pages = 0;
+	if (vmpages < 0)
+		vmpages = 0;
+	seq_printf(m, PROC_LINE_FMT, "pages", pages, PAGE_SIZE);
+	seq_printf(m, PROC_LINE_FMT, "vmalloced", vmpages, PAGE_SIZE);
+
+	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_UNUSEDPRIVVM],
+			ub->ub_unused_privvmpages, PAGE_SIZE);
+	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_TMPFSPAGES],
+			ub->ub_tmpfs_respages, PAGE_SIZE);
+	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_SWAPPAGES],
+			ub->ub_swap_pages, PAGE_SIZE);
+	/* interrupts are disabled by locking ub_hash_lock */
+	spin_lock(&cc_lock);
+	list_for_each_entry (cc, &ub->ub_cclist, ulist) {
+		kmem_cache_t *cachep;
+
+		cachep = cc->cachep;
+		seq_printf(m, PROC_LINE_FMT,
+				cachep->name,
+				cc->counter,
+				(unsigned long)cachep->objuse);
+	}
+	spin_unlock(&cc_lock);
+	return 0;
+}
+
+static struct seq_operations kmemdebug_op = {
+	.start	= ubd_start,
+	.next	= ubd_next,
+	.stop	= ubd_stop,
+	.show	= ubd_show,
+};
+
+static int kmem_debug_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kmemdebug_op);
+}
+
+static struct file_operations kmem_debug_ops = {
+	.open		= kmem_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+#endif
+
+void __init beancounter_proc_init(void)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry("user_beancounters", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &ub_file_operations;
+	else
+		panic("Can't create /proc/user_beancounters entry!\n");
+
+	entry = create_proc_entry("user_beancounters_sub", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &ub_file_operations;
+	else
+		panic("Can't create /proc/user_beancounters2 entry!\n");
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	entry = create_proc_entry("user_beancounters_debug", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &kmem_debug_ops;
+	else
+		panic("Can't create /proc/user_beancounters_debug entry!\n");
+#endif
+}
diff -Nurap linux-2.6.9-100.orig/kernel/ub/ub_stat.c linux-2.6.9-ve023stab054/kernel/ub/ub_stat.c
--- linux-2.6.9-100.orig/kernel/ub/ub_stat.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/ub_stat.c	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,465 @@
+/*
+ *  kernel/ub/ub_stat.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/timer.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/suspend.h>
+
+#include <asm/uaccess.h>
+#include <asm/param.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_stat.h>
+
+static spinlock_t ubs_notify_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(ubs_notify_list);
+static long ubs_min_interval;
+static ubstattime_t ubs_start_time, ubs_end_time;
+static struct timer_list ubs_timer;
+
+static int ubstat_get_list(void *buf, long size)
+{
+	int retval;
+	unsigned long flags;
+	int slotnr;
+	struct ub_hash_slot *slot;
+	struct user_beancounter *ub, *last_ub;
+	long *page, *ptr, *end;
+	int len;
+
+	page = (long *)__get_free_page(GFP_KERNEL);
+	if (page == NULL)
+		return -ENOMEM;
+
+	retval = 0;
+	slotnr = 0;
+	slot = ub_hash;
+	last_ub = NULL;
+	while (1) {
+		ptr = page;
+		end = page + PAGE_SIZE / sizeof(*ptr);
+
+		spin_lock_irqsave(&ub_hash_lock, flags);
+		if (last_ub == NULL)
+			ub = slot->ubh_beans;
+		else
+			ub = last_ub->ub_next;
+		while (1) {
+			for (; ub != NULL; ub = ub->ub_next) {
+				if (ub->parent != NULL)
+					continue;
+				*ptr++ = ub->ub_uid;
+				if (ptr == end)
+					break;
+			}
+			if (ptr == end)
+				break;
+			++slot;
+			if (++slotnr >= UB_HASH_SIZE)
+				break;
+			ub = slot->ubh_beans;
+		}
+		if (ptr == page)
+			goto out_unlock;
+		if (ub != NULL)
+			get_beancounter(ub);
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+		if (last_ub != NULL)
+			put_beancounter(last_ub);
+		last_ub = ub; /* last visited beancounter in the slot */
+
+		len = min_t(long, (ptr - page) * sizeof(*ptr), size);
+		if (copy_to_user(buf, page, len)) {
+			retval = -EFAULT;
+			break;
+		}
+		retval += len;
+		if (len < PAGE_SIZE)
+			break;
+		buf += len;
+		size -= len;
+	}
+out:
+	if (last_ub != NULL)
+		put_beancounter(last_ub);
+	free_page((unsigned long)page);
+	return retval;
+
+out_unlock:
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+	goto out;
+}
+
+static int ubstat_gettime(void *buf, long size)
+{
+	ubgettime_t data;
+	int retval;
+
+	spin_lock(&ubs_notify_lock);
+	data.start_time = ubs_start_time;
+	data.end_time = ubs_end_time;
+	data.cur_time = ubs_start_time + (jiffies - ubs_start_time * HZ) / HZ;
+	spin_unlock(&ubs_notify_lock);
+
+	retval = min_t(long, sizeof(data), size);
+	if (copy_to_user(buf, &data, retval))
+		retval = -EFAULT;
+	return retval;
+}
+
+static int ubstat_do_read_one(struct user_beancounter *ub, int res, void *kbuf)
+{
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparm_t	param[1];
+	} *data;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+
+	data->param[0].maxheld = ub->ub_store[res].maxheld;
+	data->param[0].failcnt = ub->ub_store[res].failcnt;
+
+	return sizeof(*data);
+}
+
+static int ubstat_do_read_all(struct user_beancounter *ub, void *kbuf, int size)
+{
+	int wrote;
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparm_t	param[UB_RESOURCES];
+	} *data;
+	int resource;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+	wrote = sizeof(data->start_time) + sizeof(data->end_time);
+
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		if (size < wrote + sizeof(data->param[resource]))
+			break;
+		data->param[resource].maxheld = ub->ub_store[resource].maxheld;
+		data->param[resource].failcnt = ub->ub_store[resource].failcnt;
+		wrote += sizeof(data->param[resource]); 
+	}
+
+	return wrote;
+}
+
+static int ubstat_do_read_full(struct user_beancounter *ub, void *kbuf,
+		int size)
+{
+	int wrote;
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparmf_t	param[UB_RESOURCES];
+	} *data;
+	int resource;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+	wrote = sizeof(data->start_time) + sizeof(data->end_time);
+
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		if (size < wrote + sizeof(data->param[resource]))
+			break;
+		/* The beginning of ubstatparmf_t matches struct ubparm. */
+		memcpy(&data->param[resource], &ub->ub_store[resource],
+				sizeof(ub->ub_store[resource]));
+		data->param[resource].__unused1 = 0;
+		data->param[resource].__unused2 = 0;
+		wrote += sizeof(data->param[resource]);
+	}
+	return wrote;
+}
+
+static int ubstat_get_stat(struct user_beancounter *ub, long cmd,
+		void *buf, long size)
+{
+	void *kbuf;
+	int retval;
+
+	kbuf = (void *)__get_free_page(GFP_KERNEL);
+	if (kbuf == NULL)
+		return -ENOMEM;
+
+	spin_lock(&ubs_notify_lock);
+	switch (UBSTAT_CMD(cmd)) {
+		case UBSTAT_READ_ONE:
+			retval = -EINVAL;
+			if (UBSTAT_PARMID(cmd) >= UB_RESOURCES)
+				break;
+			retval = ubstat_do_read_one(ub,
+					UBSTAT_PARMID(cmd), kbuf);
+			break;
+		case UBSTAT_READ_ALL:
+			retval = ubstat_do_read_all(ub, kbuf, PAGE_SIZE);
+			break;
+		case UBSTAT_READ_FULL:
+			retval = ubstat_do_read_full(ub, kbuf, PAGE_SIZE);
+			break;
+		default:
+			retval = -EINVAL;
+	}
+	spin_unlock(&ubs_notify_lock);
+
+	if (retval > 0) {
+		retval = min_t(long, retval, size);
+		if (copy_to_user(buf, kbuf, retval))
+			retval = -EFAULT;
+	}
+
+	free_page((unsigned long)kbuf);
+	return retval;
+}
+
+static int ubstat_handle_notifrq(ubnotifrq_t *req)
+{
+	int retval;
+	struct ub_stat_notify *new_notify;
+	struct list_head *entry;
+	struct task_struct *tsk_to_free;
+
+	new_notify = kmalloc(sizeof(new_notify), GFP_KERNEL);
+	if (new_notify == NULL)
+		return -ENOMEM;
+
+	tsk_to_free = NULL;
+	INIT_LIST_HEAD(&new_notify->list);
+
+	spin_lock(&ubs_notify_lock);
+	list_for_each(entry, &ubs_notify_list) {
+		struct ub_stat_notify *notify;
+
+		notify = list_entry(entry, struct ub_stat_notify, list);
+		if (notify->task == current) {
+			kfree(new_notify);
+			new_notify = notify;
+			break;
+		}
+	}
+
+	retval = -EINVAL;
+	if (req->maxinterval < 1)
+		goto out_unlock;
+	if (req->maxinterval > TIME_MAX_SEC)
+		req->maxinterval = TIME_MAX_SEC;
+	if (req->maxinterval < ubs_min_interval) {
+		unsigned long dif;
+
+		ubs_min_interval = req->maxinterval;
+		dif = (ubs_timer.expires - jiffies + HZ - 1) / HZ;
+		if (dif > req->maxinterval)
+			mod_timer(&ubs_timer,
+					ubs_timer.expires -
+					(dif - req->maxinterval) * HZ);
+	}
+
+	if (entry != &ubs_notify_list) {
+		list_del(&new_notify->list);
+		tsk_to_free = new_notify->task;
+	}
+	if (req->signum) {
+		new_notify->task = current;
+		get_task_struct(new_notify->task);
+		new_notify->signum = req->signum;
+		list_add(&new_notify->list, &ubs_notify_list);
+	} else
+		kfree(new_notify);
+	retval = 0;
+out_unlock:
+	spin_unlock(&ubs_notify_lock);
+	if (tsk_to_free != NULL)
+		put_task_struct(tsk_to_free);
+	return retval;
+}
+
+/*
+ * former sys_ubstat
+ */
+long do_ubstat(int func, unsigned long arg1, unsigned long arg2, void *buf, 
+		long size)
+{
+	int retval;
+	struct user_beancounter *ub;
+
+	if (func == UBSTAT_UBPARMNUM)
+		return UB_RESOURCES;
+	if (func == UBSTAT_UBLIST)
+		return ubstat_get_list(buf, size);
+	if (!(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)))
+		return -EPERM;
+
+	if (func == UBSTAT_GETTIME) {
+		retval = ubstat_gettime(buf, size);
+		goto notify;
+	}
+
+	ub = get_exec_ub();
+	if (ub != NULL && ub->ub_uid == arg1)
+		get_beancounter(ub);
+	else /* FIXME must be if (ve_is_super) */
+		ub = get_beancounter_byuid(arg1, 0);
+
+	if (ub == NULL)
+		return -ESRCH;
+
+	retval = ubstat_get_stat(ub, func, buf, size);
+	put_beancounter(ub);
+notify:
+	/* Handle request for notification */
+	if (retval >= 0) {
+		ubnotifrq_t notifrq;
+		int err;
+
+		err = -EFAULT;
+		if (!copy_from_user(&notifrq, (void *)arg2, sizeof(notifrq)))
+			err = ubstat_handle_notifrq(&notifrq);
+		if (err)
+			retval = err;
+	}
+
+	return retval;
+}
+
+static void ubstat_save_onestat(struct user_beancounter *ub)
+{
+	int resource;
+
+	/* called with local irq disabled */
+	spin_lock(&ub->ub_lock);
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		memcpy(&ub->ub_store[resource], &ub->ub_parms[resource],
+			sizeof(struct ubparm));
+		ub->ub_parms[resource].minheld = 
+			ub->ub_parms[resource].maxheld =
+			ub->ub_parms[resource].held;
+	}
+	spin_unlock(&ub->ub_lock);
+}
+
+static void ubstat_save_statistics(void)
+{
+	unsigned long flags;
+	int i;
+	struct user_beancounter *ub;
+
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	for_each_beancounter(i, ub)
+			ubstat_save_onestat(ub);
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+}
+
+static void ubstatd_timeout(unsigned long __data)
+{
+	struct task_struct *p;
+
+	p = (struct task_struct *) __data;
+	wake_up_process(p);
+}
+
+/*
+ * Safe wrapper for send_sig. It prevents a race with release_task
+ * for sighand.
+ * Should be called under tasklist_lock.
+ */
+static void task_send_sig(struct ub_stat_notify *notify)
+{
+	if (likely(notify->task->sighand != NULL))
+		send_sig(notify->signum, notify->task, 1);
+}
+
+static inline void do_notifies(void)
+{
+	LIST_HEAD(notif_free_list);
+	struct ub_stat_notify *notify;
+	struct ub_stat_notify *tmp;
+
+	spin_lock(&ubs_notify_lock);
+	ubs_start_time = ubs_end_time;
+	/*
+	 * the expression below relies on time being unsigned long and
+	 * arithmetic promotion rules
+	 */
+	ubs_end_time += (ubs_timer.expires - ubs_start_time * HZ) / HZ;
+	mod_timer(&ubs_timer, ubs_timer.expires + ubs_min_interval * HZ);
+	ubs_min_interval = TIME_MAX_SEC;
+	/* save statistics accumulated for the interval */
+	ubstat_save_statistics();
+	/* send signals */
+	read_lock(&tasklist_lock);
+	while (!list_empty(&ubs_notify_list)) {
+		notify = list_entry(ubs_notify_list.next,
+				struct ub_stat_notify, list);
+		task_send_sig(notify);
+		list_del(&notify->list);
+		list_add(&notify->list, &notif_free_list);
+	}
+	read_unlock(&tasklist_lock);
+	spin_unlock(&ubs_notify_lock);
+
+	list_for_each_entry_safe(notify, tmp, &notif_free_list, list) {
+		put_task_struct(notify->task);
+		kfree(notify);
+	}
+}
+
+/*
+ * Kernel thread
+ */
+static int ubstatd(void *unused)
+{
+	/* daemonize call will take care of signals */
+	daemonize("ubstatd");
+
+	ubs_timer.data = (unsigned long)current;
+	ubs_timer.function = ubstatd_timeout;
+	add_timer(&ubs_timer);
+
+	while (1) {
+		set_task_state(current, TASK_INTERRUPTIBLE);
+		if (time_after(ubs_timer.expires, jiffies)) {
+			schedule();
+			if (test_thread_flag(TIF_FREEZE))
+				refrigerator();
+			continue;
+		}
+
+		__set_task_state(current, TASK_RUNNING);
+		do_notifies();
+	}
+}
+
+static int __init ubstatd_init(void)
+{
+	init_timer(&ubs_timer);
+	ubs_timer.expires = TIME_MAX_JIF;
+	ubs_min_interval = TIME_MAX_SEC;
+	ubs_start_time = ubs_end_time = 0;
+
+	kernel_thread(ubstatd, NULL, 0);
+	return 0;
+}
+
+module_init(ubstatd_init);
diff -Nurap linux-2.6.9-100.orig/kernel/ub/ub_sys.c linux-2.6.9-ve023stab054/kernel/ub/ub_sys.c
--- linux-2.6.9-100.orig/kernel/ub/ub_sys.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ub/ub_sys.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,170 @@
+/*
+ *  kernel/ub/ub_sys.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/virtinfo.h>
+#include <asm/uaccess.h>
+
+#include <ub/beancounter.h>
+
+#ifndef CONFIG_USER_RESOURCE
+asmlinkage long sys_getluid(void)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long sys_setluid(uid_t uid)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long sys_setublimit(uid_t uid, unsigned long resource, 
+		unsigned long *limits)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2, 
+		void *buf, long size)
+{
+	return -ENOSYS;
+}
+#else /* CONFIG_USER_RESOURCE */
+
+/*
+ *	The (rather boring) getluid syscall
+ */
+asmlinkage long sys_getluid(void)
+{
+	struct user_beancounter *ub;
+
+	ub = get_exec_ub();
+	if (ub == NULL)
+		return -EINVAL;
+
+	return ub->ub_uid;
+}
+
+/*
+ *	The setluid syscall
+ */
+asmlinkage long sys_setluid(uid_t uid)
+{
+	struct user_beancounter *ub;
+	struct task_beancounter *task_bc;
+	int error;
+
+	task_bc = task_bc(current);
+
+	/* You may not disown a setluid */
+	error = -EINVAL;
+	if (uid == (uid_t)-1)
+		goto out;
+
+	/* You may only set an ub as root */
+	error = -EPERM;
+	if (!capable(CAP_SETUID))
+		goto out;
+
+	/* 
+	 * The ub once set is irrevocable to all
+	 * unless it's set from ve0.
+	 */
+	if (!ve_is_super(get_exec_env()))
+		goto out;
+
+	/* Ok - set up a beancounter entry for this user */
+	error = -ENOBUFS;
+	ub = get_beancounter_byuid(uid, 1);
+	if (ub == NULL)
+		goto out;
+
+	ub_debug(UBD_ALLOC | UBD_LIMIT, "setluid, bean %p (count %d) "
+			"for %.20s pid %d\n",
+			ub, atomic_read(&ub->ub_refcount),
+			current->comm, current->pid);
+	/* install bc */
+	error = virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_NEWUBC, ub);
+	if (!(error & NOTIFY_FAIL)) {
+		put_beancounter(task_bc->exec_ub);
+		task_bc->exec_ub = ub;
+		if (!(error & NOTIFY_OK)) {
+			put_beancounter(task_bc->fork_sub);
+			task_bc->fork_sub = get_beancounter(ub);
+		}
+		error = 0;
+	} else {
+		put_beancounter(ub);
+		error = -ENOBUFS;
+	}
+out:
+	return error;
+}
+
+/*
+ *	The setbeanlimit syscall
+ */
+asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
+		unsigned long *limits)
+{
+	int error;
+	unsigned long flags;
+	struct user_beancounter *ub;
+	unsigned long new_limits[2];
+
+	error = -EPERM;
+	if(!capable(CAP_SYS_RESOURCE))
+		goto out;
+
+	if (!ve_is_super(get_exec_env()))
+		goto out;
+
+	error = -EINVAL;
+	if (resource >= UB_RESOURCES)
+		goto out;
+
+	error = -EFAULT;
+	if (copy_from_user(&new_limits, limits, sizeof(new_limits)))
+		goto out;
+
+	error = -EINVAL;
+	if (new_limits[0] > UB_MAXVALUE || new_limits[1] > UB_MAXVALUE)
+		goto out;
+
+	error = -ENOENT;
+	ub = get_beancounter_byuid(uid, 0);
+	if (ub == NULL) {
+		ub_debug(UBD_LIMIT, "No login bc for uid %d\n", uid);
+		goto out;
+	}
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_parms[resource].barrier = new_limits[0];
+	ub->ub_parms[resource].limit = new_limits[1];
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	put_beancounter(ub);
+
+	error = 0;
+out:
+	return error;
+}
+
+extern long do_ubstat(int func, unsigned long arg1, unsigned long arg2, 
+		void *buf, long size);
+asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2, 
+		void *buf, long size)
+{
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
+	return do_ubstat(func, arg1, arg2, buf, size);
+}
+#endif
diff -Nurap linux-2.6.9-100.orig/kernel/user.c linux-2.6.9-ve023stab054/kernel/user.c
--- linux-2.6.9-100.orig/kernel/user.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/kernel/user.c	2011-06-15 19:26:21.000000000 +0400
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/key.h>
+#include <linux/module.h>
 
 /*
  * UID task count cache, to get fast user lookup in "alloc_uid"
@@ -22,7 +23,20 @@
 #define UIDHASH_SZ		(1 << UIDHASH_BITS)
 #define UIDHASH_MASK		(UIDHASH_SZ - 1)
 #define __uidhashfn(uid)	(((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
-#define uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
+#define __uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
+
+#ifdef CONFIG_VE
+#define UIDHASH_MASK_VE			(UIDHASH_SZ_VE - 1)
+#define __uidhashfn_ve(uid)		(((uid >> UIDHASH_BITS_VE) ^ uid) & \
+						UIDHASH_MASK_VE)
+#define __uidhashentry_ve(uid, envid)	((envid)->uidhash_table + \
+						__uidhashfn_ve(uid))
+#define uidhashentry_ve(uid)		(ve_is_super(get_exec_env()) ?	\
+						__uidhashentry(uid) :	\
+						__uidhashentry_ve(uid, get_exec_env()))
+#else
+#define uidhashentry_ve(uid)		__uidhashentry(uid)
+#endif
 
 static kmem_cache_t *uid_cachep;
 static struct list_head uidhash_table[UIDHASH_SZ];
@@ -83,7 +97,7 @@ struct user_struct *find_user(uid_t uid)
 	struct user_struct *ret;
 
 	spin_lock_irq(&uidhash_lock);
-	ret = uid_hash_find(uid, uidhashentry(uid));
+	ret = uid_hash_find(uid, uidhashentry_ve(uid));
 	spin_unlock_irq(&uidhash_lock);
 	return ret;
 }
@@ -105,10 +119,11 @@ void free_uid(struct user_struct *up)
 	}
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL(free_uid);
 
 struct user_struct * alloc_uid(uid_t uid)
 {
-	struct list_head *hashent = uidhashentry(uid);
+	struct list_head *hashent = uidhashentry_ve(uid);
 	struct user_struct *up;
 
 	spin_lock_irq(&uidhash_lock);
@@ -154,6 +169,7 @@ struct user_struct * alloc_uid(uid_t uid
 	}
 	return up;
 }
+EXPORT_SYMBOL(alloc_uid);
 
 void switch_uid(struct user_struct *new_user)
 {
@@ -182,6 +198,7 @@ void switch_uid(struct user_struct *new_
 	free_uid(old_user);
 	suid_keys(current);
 }
+EXPORT_SYMBOL(switch_uid);
 
 
 static int __init uid_cache_init(void)
@@ -189,14 +206,14 @@ static int __init uid_cache_init(void)
 	int n;
 
 	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 
 	for(n = 0; n < UIDHASH_SZ; ++n)
 		INIT_LIST_HEAD(uidhash_table + n);
 
 	/* Insert the root user immediately (init already runs as root) */
 	spin_lock_irq(&uidhash_lock);
-	uid_hash_insert(&root_user, uidhashentry(0));
+	uid_hash_insert(&root_user, __uidhashentry(0));
 	spin_unlock_irq(&uidhash_lock);
 
 	return 0;
diff -Nurap linux-2.6.9-100.orig/kernel/ve.c linux-2.6.9-ve023stab054/kernel/ve.c
--- linux-2.6.9-100.orig/kernel/ve.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/ve.c	2011-06-15 19:26:21.000000000 +0400
@@ -0,0 +1,187 @@
+/*
+ *  linux/kernel/ve.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * 've.c' helper file performing VE sub-system initialization
+ */
+
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/ve.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/sys.h>
+#include <linux/kdev_t.h>
+#include <linux/termios.h>
+#include <linux/tty_driver.h>
+#include <linux/netdevice.h>
+#include <linux/utsname.h>
+#include <linux/proc_fs.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/rcupdate.h>
+#include <linux/ve_proto.h>
+#include <linux/ve_owner.h>
+
+#include <linux/nfcalls.h>
+
+unsigned long vz_rstamp = 0x37e0f59d;
+EXPORT_SYMBOL(vz_rstamp);
+
+#ifdef CONFIG_MODULES
+struct module no_module = { .state = MODULE_STATE_GOING };
+EXPORT_SYMBOL(no_module);
+#endif
+
+#ifdef CONFIG_VE
+
+DCL_VE_OWNER(SKB, SLAB, struct sk_buff, owner_env, , (noinline, regparm(1)))
+DCL_VE_OWNER(SK, SLAB, struct sock, sk_owner_env, , (noinline, regparm(1)))
+DCL_VE_OWNER(TW, SLAB, struct tcp_tw_bucket, tw_owner_env, , (noinline, regparm(1)))
+DCL_VE_OWNER(FILP, GENERIC, struct file, owner_env, inline, (always_inline))
+DCL_VE_OWNER(FSTYPE, MODULE, struct file_system_type, owner_env, , ())
+
+#if defined(CONFIG_VE_IPTABLES)
+INIT_KSYM_MODULE(ip_tables);
+INIT_KSYM_MODULE(iptable_filter);
+INIT_KSYM_MODULE(iptable_mangle);
+INIT_KSYM_MODULE(ipt_limit);
+INIT_KSYM_MODULE(ipt_multiport);
+INIT_KSYM_MODULE(ipt_tos);
+INIT_KSYM_MODULE(ipt_TOS);
+INIT_KSYM_MODULE(ipt_REJECT);
+INIT_KSYM_MODULE(ipt_TCPMSS);
+INIT_KSYM_MODULE(ipt_tcpmss);
+INIT_KSYM_MODULE(ipt_ttl);
+INIT_KSYM_MODULE(ipt_LOG);
+INIT_KSYM_MODULE(ipt_length);
+INIT_KSYM_MODULE(ip_conntrack);
+INIT_KSYM_MODULE(ip_conntrack_ftp);
+INIT_KSYM_MODULE(ip_conntrack_irc);
+INIT_KSYM_MODULE(ipt_conntrack);
+INIT_KSYM_MODULE(ipt_state);
+INIT_KSYM_MODULE(ipt_helper);
+INIT_KSYM_MODULE(iptable_nat);
+INIT_KSYM_MODULE(ip_nat_ftp);
+INIT_KSYM_MODULE(ip_nat_irc);
+INIT_KSYM_MODULE(ipt_REDIRECT);
+INIT_KSYM_MODULE(ipt_owner);
+
+INIT_KSYM_CALL(int, init_netfilter, (void));
+INIT_KSYM_CALL(int, init_iptables, (void));
+INIT_KSYM_CALL(int, init_iptable_filter, (void));
+INIT_KSYM_CALL(int, init_iptable_mangle, (void));
+INIT_KSYM_CALL(int, init_iptable_limit, (void));
+INIT_KSYM_CALL(int, init_iptable_multiport, (void));
+INIT_KSYM_CALL(int, init_iptable_tos, (void));
+INIT_KSYM_CALL(int, init_iptable_TOS, (void));
+INIT_KSYM_CALL(int, init_iptable_REJECT, (void));
+INIT_KSYM_CALL(int, init_iptable_TCPMSS, (void));
+INIT_KSYM_CALL(int, init_iptable_tcpmss, (void));
+INIT_KSYM_CALL(int, init_iptable_ttl, (void));
+INIT_KSYM_CALL(int, init_iptable_LOG, (void));
+INIT_KSYM_CALL(int, init_iptable_length, (void));
+INIT_KSYM_CALL(int, init_iptable_conntrack, (void));
+INIT_KSYM_CALL(int, init_iptable_ftp, (void));
+INIT_KSYM_CALL(int, init_iptable_irc, (void));
+INIT_KSYM_CALL(int, init_iptable_conntrack_match, (void));
+INIT_KSYM_CALL(int, init_iptable_state, (void));
+INIT_KSYM_CALL(int, init_iptable_helper, (void));
+INIT_KSYM_CALL(int, init_iptable_nat, (void));
+INIT_KSYM_CALL(int, init_iptable_nat_ftp, (void));
+INIT_KSYM_CALL(int, init_iptable_nat_irc, (void));
+INIT_KSYM_CALL(int, init_iptable_REDIRECT, (void));
+INIT_KSYM_CALL(int, init_iptable_owner, (void));
+INIT_KSYM_CALL(void, fini_iptable_owner, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat_irc, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat, (void));
+INIT_KSYM_CALL(void, fini_iptable_helper, (void));
+INIT_KSYM_CALL(void, fini_iptable_state, (void));
+INIT_KSYM_CALL(void, fini_iptable_conntrack_match, (void));
+INIT_KSYM_CALL(void, fini_iptable_irc, (void));
+INIT_KSYM_CALL(void, fini_iptable_ftp, (void));
+INIT_KSYM_CALL(void, fini_iptable_conntrack, (void));
+INIT_KSYM_CALL(void, fini_iptable_length, (void));
+INIT_KSYM_CALL(void, fini_iptable_LOG, (void));
+INIT_KSYM_CALL(void, fini_iptable_ttl, (void));
+INIT_KSYM_CALL(void, fini_iptable_tcpmss, (void));
+INIT_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
+INIT_KSYM_CALL(void, fini_iptable_REJECT, (void));
+INIT_KSYM_CALL(void, fini_iptable_TOS, (void));
+INIT_KSYM_CALL(void, fini_iptable_tos, (void));
+INIT_KSYM_CALL(void, fini_iptable_multiport, (void));
+INIT_KSYM_CALL(void, fini_iptable_limit, (void));
+INIT_KSYM_CALL(void, fini_iptable_filter, (void));
+INIT_KSYM_CALL(void, fini_iptable_mangle, (void));
+INIT_KSYM_CALL(void, fini_iptables, (void));
+INIT_KSYM_CALL(void, fini_netfilter, (void));
+INIT_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
+
+INIT_KSYM_CALL(void, ipt_flush_table, (struct ipt_table *table));
+#endif
+
+#if defined(CONFIG_VE_CALLS_MODULE) || defined(CONFIG_VE_CALLS)
+INIT_KSYM_MODULE(vzmon);
+INIT_KSYM_CALL(int, real_get_device_perms_ve,
+		(int dev_type, dev_t dev, int access_mode));
+INIT_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
+INIT_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
+INIT_KSYM_CALL(void, real_update_load_avg_ve, (void));
+
+int get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
+{
+	return KSYMSAFECALL(int, vzmon, real_get_device_perms_ve,
+					(dev_type, dev, access_mode));
+}
+EXPORT_SYMBOL(get_device_perms_ve);
+
+void do_env_cleanup(struct ve_struct *env)
+{
+	KSYMSAFECALL_VOID(vzmon, real_do_env_cleanup, (env));
+}
+
+void do_env_free(struct ve_struct *env)
+{
+	KSYMSAFECALL_VOID(vzmon, real_do_env_free, (env));
+}
+EXPORT_SYMBOL(do_env_free);
+
+void do_update_load_avg_ve(void)
+{
+	KSYMSAFECALL_VOID(vzmon, real_update_load_avg_ve, ());
+}
+#endif
+
+#if defined(CONFIG_VE_ETHDEV) || defined(CONFIG_VE_ETHDEV_MODULE)
+INIT_KSYM_MODULE(vzethdev);
+INIT_KSYM_CALL(int, veth_open, (struct net_device *dev));
+#endif
+
+struct ve_struct ve0 = {
+	.utsname		= &system_utsname,
+	.vetask_lh		= LIST_HEAD_INIT(ve0.vetask_lh),
+	.start_jiffies		= INITIAL_JIFFIES,
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	._net_dev_tail		= &ve0._net_dev_base,
+	.ifindex		= -1,
+#endif
+	._simple_dev_list	= LIST_HEAD_INIT(ve0._simple_dev_list),
+	.is_running		= 1,
+};
+
+EXPORT_SYMBOL(ve0);
+
+#endif /* CONFIG_VE */
diff -Nurap linux-2.6.9-100.orig/kernel/vecalls.c linux-2.6.9-ve023stab054/kernel/vecalls.c
--- linux-2.6.9-100.orig/kernel/vecalls.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/vecalls.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,3538 @@
+/*
+ *  linux/kernel/vecalls.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ */
+
+/*
+ * 'vecalls.c' is file with basic VE support. It provides basic primities
+ * along with initialization script
+ */
+
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/ve.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/ve_owner.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sys.h>
+#include <linux/fs.h>
+#include <linux/namespace.h>
+#include <linux/termios.h>
+#include <linux/tty_driver.h>
+#include <linux/netdevice.h>
+#include <linux/wait.h>
+#include <linux/inetdevice.h>
+#include <linux/utsname.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/suspend.h>
+#include <linux/rcupdate.h>
+#include <linux/in.h>
+#include <linux/major.h>
+#include <linux/kdev_t.h>
+#include <linux/idr.h>
+#include <linux/inetdevice.h>
+#include <net/pkt_sched.h>
+#include <linux/divert.h>
+#include <ub/beancounter.h>
+
+#include <net/route.h>
+#include <net/ip_fib.h>
+#include <net/arp.h>
+
+#include <linux/ve_proto.h>
+#include <linux/venet.h>
+#include <linux/vzctl.h>
+#include <linux/vzcalluser.h>
+#ifdef CONFIG_FAIRSCHED
+#include <linux/fairsched.h>
+#endif
+
+#include <linux/nfcalls.h>
+#include <linux/virtinfo.h>
+
+struct ve_struct *ve_list_head = NULL;
+int nr_ve = 1;	/* One VE always exists. Compatibility with vestat */
+rwlock_t ve_list_guard = RW_LOCK_UNLOCKED;
+static rwlock_t devperms_hash_guard = RW_LOCK_UNLOCKED;
+
+extern int glob_virt_pids;
+extern int glob_ve_meminfo;
+
+static int	do_env_enter(struct ve_struct *ve, unsigned int flags);
+int		real_env_create(envid_t veid, unsigned flags, u32 class_id,
+				env_create_param_t *data, int datalen);
+static void	do_clean_devperms(envid_t veid);
+static int	alloc_ve_tty_drivers(struct ve_struct* ve);
+static void	free_ve_tty_drivers(struct ve_struct* ve);
+static int	register_ve_tty_drivers(struct ve_struct* ve);
+static void	unregister_ve_tty_drivers(struct ve_struct* ve);
+static int	init_ve_tty_drivers(struct ve_struct *);
+static void	fini_ve_tty_drivers(struct ve_struct *);
+static void	clear_termios(struct tty_driver* driver );
+static void	ve_mapped_devs_cleanup(struct ve_struct *ve);
+
+static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf);
+
+static void vecalls_exit(void);
+
+struct ve_struct *__find_ve_by_id(envid_t veid)
+{
+	struct ve_struct *ve;
+	for (ve = ve_list_head;
+	     ve != NULL && ve->veid != veid;
+	     ve = ve->next);
+	return ve;
+}
+
+struct ve_struct *get_ve_by_id(envid_t veid)
+{
+	struct ve_struct *ve;
+	read_lock(&ve_list_guard);
+	ve = __find_ve_by_id(veid);
+	get_ve(ve);
+	read_unlock(&ve_list_guard);
+	return ve;
+}
+
+/*
+ * real_put_ve() MUST be used instead of put_ve() inside vecalls.
+ */
+void real_do_env_free(struct ve_struct *ve);
+static inline void real_put_ve(struct ve_struct *ve)
+{
+	if (ve && atomic_dec_and_test(&ve->counter)) {
+		if (atomic_read(&ve->pcounter) > 0)
+			BUG();
+		if (ve->is_running)
+			BUG();
+		real_do_env_free(ve);
+	}
+}
+
+extern struct file_system_type devpts_fs_type;
+extern struct file_system_type sysfs_fs_type;
+extern struct file_system_type tmpfs_fs_type;
+extern struct file_system_type proc_fs_type;
+
+extern spinlock_t task_capability_lock;
+extern void ve_ipc_free(struct ve_struct * ve);
+extern void ip_fragment_cleanup(struct ve_struct *ve);
+
+static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf)
+{
+	struct ve_struct *ve;
+	struct vz_cpu_stat *vstat;
+	int retval;
+	int i, cpu;
+	unsigned long tmp;
+
+	if (!ve_is_super(get_exec_env()) && (veid != get_exec_env()->veid))
+		return -EPERM;
+	if (veid == 0)
+		return -ESRCH;
+
+	vstat = kmalloc(sizeof(*vstat), GFP_KERNEL);
+	if (!vstat)
+		return -ENOMEM;
+	memset(vstat, 0, sizeof(*vstat));
+	
+	retval = -ESRCH;
+	read_lock(&ve_list_guard);
+	ve = __find_ve_by_id(veid);
+	if (ve == NULL)
+		goto out_unlock;
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		vstat->user_jif += VE_CPU_STATS(ve, cpu)->user;
+		vstat->nice_jif += VE_CPU_STATS(ve, cpu)->nice;
+		vstat->system_jif += VE_CPU_STATS(ve, cpu)->system;
+		vstat->idle_clk += ve_sched_get_idle_time(ve, cpu);
+	}
+	vstat->uptime_clk = get_cycles() - ve->start_cycles;
+	vstat->uptime_jif = jiffies - (unsigned long)ve->start_jiffies;
+	for (i = 0; i < 3; i++) {
+		tmp = ve->avenrun[i] + (FIXED_1/200);
+		vstat->avenrun[i].val_int = LOAD_INT(tmp);
+		vstat->avenrun[i].val_frac = LOAD_FRAC(tmp);
+	}
+	read_unlock(&ve_list_guard);
+
+	retval = 0;
+	if (copy_to_user(buf, vstat, sizeof(*vstat)))
+		retval = -EFAULT;
+out_free:
+	kfree(vstat);
+	return retval;
+
+out_unlock:
+	read_unlock(&ve_list_guard);
+	goto out_free;
+}
+
+/**********************************************************************
+ * Devices permissions routines,
+ * character and block devices separately
+ **********************************************************************/
+
+/* Rules applied in the following order:
+   MAJOR!=0, MINOR!=0
+   MAJOR!=0, MINOR==0
+   MAJOR==0, MINOR==0
+*/
+struct devperms_struct
+{
+	dev_t   	dev;	/* device id */
+	unsigned char	mask;
+	unsigned 	type;
+	envid_t	 	veid;
+
+	struct devperms_struct *devhash_next;
+	struct devperms_struct **devhash_pprev;
+};
+
+static struct devperms_struct original_perms[] =
+{{
+	MKDEV(0,0),	/*device*/
+	S_IROTH | S_IWOTH,
+	S_IFCHR,	/*type*/
+	0,		/*veid*/
+	NULL, NULL
+},
+{
+	MKDEV(0,0),	/*device*/
+	S_IXGRP | S_IROTH | S_IWOTH,
+	S_IFBLK,	/*type*/
+	0,		/*veid*/
+	NULL, NULL
+}};
+
+static struct devperms_struct default_major_perms[] = {
+	{MKDEV(UNIX98_PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
+	{MKDEV(UNIX98_PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
+	{MKDEV(PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
+	{MKDEV(PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
+};
+static struct devperms_struct default_minor_perms[] = {
+	{MKDEV(MEM_MAJOR, 3), S_IROTH | S_IWOTH, S_IFCHR},   /* null */
+	{MKDEV(MEM_MAJOR, 5), S_IROTH | S_IWOTH, S_IFCHR},   /* zero */
+	{MKDEV(MEM_MAJOR, 7), S_IROTH | S_IWOTH, S_IFCHR},   /* full */
+	{MKDEV(TTYAUX_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},/* tty */
+	{MKDEV(TTYAUX_MAJOR, 2), S_IROTH | S_IWOTH, S_IFCHR},/* ptmx */
+	{MKDEV(MEM_MAJOR, 8), S_IROTH, S_IFCHR},  /* random */
+	{MKDEV(MEM_MAJOR, 9), S_IROTH, S_IFCHR},  /* urandom */
+};
+
+static struct devperms_struct default_deny_perms = {
+	MKDEV(0, 0), 0, S_IFCHR
+};
+
+static inline struct devperms_struct *find_default_devperms(int type,
+						    dev_t dev)
+{
+	int i;
+
+	/* XXX all defaults perms are S_IFCHR */
+	if (type != S_IFCHR)
+		return &default_deny_perms;
+
+	for (i = 0; 
+	     i < sizeof(default_minor_perms)/sizeof(struct devperms_struct);
+	     i++)
+		if (MAJOR(dev) == MAJOR(default_minor_perms[i].dev) &&
+		    MINOR(dev) == MINOR(default_minor_perms[i].dev))
+			return &default_minor_perms[i];
+	for (i = 0; 
+	     i < sizeof(default_major_perms)/sizeof(struct devperms_struct);
+	     i++)
+		if (MAJOR(dev) == MAJOR(default_major_perms[i].dev))
+			return &default_major_perms[i];
+
+	return &default_deny_perms;
+}
+
+#define DEVPERMS_HASH_SZ 512
+struct devperms_struct *devperms_hash[DEVPERMS_HASH_SZ];
+
+#define devperms_hashfn(id,dev) \
+	( (id << 5) ^ (id >> 5) ^ (MAJOR(dev)) ^ MINOR(dev) ) & \
+						(DEVPERMS_HASH_SZ - 1)
+
+static inline void hash_devperms(struct devperms_struct *p)
+{
+	struct devperms_struct **htable =
+		&devperms_hash[devperms_hashfn(p->veid,p->dev)];
+
+	if ((p->devhash_next = *htable) != NULL)
+		(*htable)->devhash_pprev = &p->devhash_next;
+	*htable = p;
+	p->devhash_pprev = htable;
+}
+
+static inline void unhash_devperms(struct devperms_struct *p)
+{
+	if (p->devhash_next)
+		p->devhash_next->devhash_pprev = p->devhash_pprev;
+	*p->devhash_pprev = p->devhash_next;
+}
+
+static int __init init_devperms_hash(void)
+{
+	write_lock_irq(&devperms_hash_guard);
+	memset(devperms_hash, 0, sizeof(devperms_hash));
+	hash_devperms(original_perms);
+	hash_devperms(original_perms+1);
+	write_unlock_irq(&devperms_hash_guard);
+	return 0;
+}
+
+static inline void fini_devperms_hash(void)
+{
+}
+
+static inline struct devperms_struct *find_devperms(envid_t veid,
+						    int type,
+						    dev_t dev)
+{
+	struct devperms_struct *p, **htable =
+		&devperms_hash[devperms_hashfn(veid,dev)];
+
+	for (p = *htable; p && !(p->type==type &&
+				 MAJOR(dev)==MAJOR(p->dev) &&
+				 MINOR(dev)==MINOR(p->dev) &&
+				 p->veid==veid);
+	     p = p->devhash_next)
+		;
+	return p;
+}
+
+
+static void do_clean_devperms(envid_t veid)
+{
+	int i;
+	struct devperms_struct* ve;
+
+	write_lock_irq(&devperms_hash_guard);
+	for (i = 0; i < DEVPERMS_HASH_SZ; i++)
+		for (ve = devperms_hash[i]; ve;) {
+			struct devperms_struct *next = ve->devhash_next;
+			if (ve->veid == veid) {
+				unhash_devperms(ve);
+				kfree(ve);
+			}
+
+			ve = next;
+		}
+	write_unlock_irq(&devperms_hash_guard);
+}
+
+/*
+ * Mode is a mask of
+ *	FMODE_READ	for read access (configurable by S_IROTH)
+ *	FMODE_WRITE	for write access (configurable by S_IWOTH)
+ *	FMODE_QUOTACTL	for quotactl access (configurable by S_IXGRP)
+ */
+int real_get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
+{
+	struct devperms_struct *perms;
+	struct ve_struct *ve;
+	envid_t veid;
+
+	perms = NULL;
+	ve = get_exec_env();
+	veid = ve->veid;
+
+	read_lock(&devperms_hash_guard);
+
+	perms = find_devperms(veid, dev_type|VE_USE_MINOR, dev);
+	if (perms)
+		goto end;
+
+	perms = find_devperms(veid, dev_type|VE_USE_MAJOR, MKDEV(MAJOR(dev),0));
+	if (perms)
+		goto end;
+
+	perms = find_devperms(veid, dev_type, MKDEV(0,0));
+	if (perms)
+		goto end;
+
+	perms = find_default_devperms(dev_type, dev);
+
+end:
+	read_unlock(&devperms_hash_guard);
+
+	access_mode = "\000\004\002\006\010\014\012\016"[access_mode];
+	return perms ?
+		(((perms->mask & access_mode) == access_mode) ? 0 : -EACCES) :
+		-ENODEV;
+}
+
+int do_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
+{
+	struct devperms_struct   *perms;
+
+	write_lock_irq(&devperms_hash_guard);
+	perms = find_devperms(veid, type, dev);
+	if (!perms) {
+		struct devperms_struct   *perms_new;
+		write_unlock_irq(&devperms_hash_guard);
+
+		perms_new = kmalloc(sizeof(struct devperms_struct), GFP_KERNEL);
+		if (!perms_new)
+			return -ENOMEM;
+
+		write_lock_irq(&devperms_hash_guard);
+		perms = find_devperms(veid, type, dev);
+		if (perms) {
+			kfree(perms_new);
+			perms_new = perms;
+		}
+
+		switch (type & VE_USE_MASK) {
+		case 0:
+			dev = 0;
+			break;
+		case VE_USE_MAJOR:
+			dev = MKDEV(MAJOR(dev),0);
+			break;
+		}
+
+		perms_new->veid = veid;
+		perms_new->dev = dev;
+		perms_new->type = type;
+		perms_new->mask = mask & S_IALLUGO;
+		hash_devperms(perms_new);
+	} else
+		perms->mask = mask & S_IALLUGO;
+	write_unlock_irq(&devperms_hash_guard);
+	return 0;
+}
+EXPORT_SYMBOL(do_setdevperms);
+
+int real_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
+{
+	struct ve_struct *ve;
+	int err;
+
+	if (!capable(CAP_SETVEID) || veid == 0)
+		return -EPERM;
+
+	if ((ve = get_ve_by_id(veid)) == NULL)
+		return -ESRCH;
+
+	down_read(&ve->op_sem);
+	err = -ESRCH;
+	if (ve->is_running)
+		err = do_setdevperms(veid, type, dev, mask);
+	up_read(&ve->op_sem);
+	real_put_ve(ve);
+	return err;
+}
+
+void real_update_load_avg_ve(void)
+{
+	struct ve_struct *ve;
+	unsigned long nr_active;
+
+	read_lock(&ve_list_guard);
+	for (ve = ve_list_head; ve != NULL; ve = ve->next) {
+		nr_active = nr_running_ve(ve) + nr_uninterruptible_ve(ve);
+		nr_active *= FIXED_1;
+		CALC_LOAD(ve->avenrun[0], EXP_1, nr_active);
+		CALC_LOAD(ve->avenrun[1], EXP_5, nr_active);
+		CALC_LOAD(ve->avenrun[2], EXP_15, nr_active);
+	}
+	read_unlock(&ve_list_guard);
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * FS-related helpers to VE start/stop
+ *
+ **********************************************************************
+ **********************************************************************/
+
+/*
+ * DEVPTS needs a virtualization: each environment should see each own list of
+ * pseudo-terminals.
+ * To implement it we need to have separate devpts superblocks for each
+ * VE, and each VE should mount its own one.
+ * Thus, separate vfsmount structures are required.
+ * To minimize intrusion into vfsmount lookup code, separate file_system_type
+ * structures are created.
+ *
+ * In addition to this, patch fo character device itself is required, as file
+ * system itself is used only for MINOR/MAJOR lookup.
+ */
+static int register_ve_fs_type(struct ve_struct *ve,
+		struct file_system_type *template,
+		struct file_system_type **p_fs_type, struct vfsmount **p_mnt)
+{
+	struct vfsmount *mnt;
+	struct file_system_type *local_fs_type;
+	int ret;
+
+	VZTRACE("register_ve_fs_type(\"%s\")\n", template->name);
+
+	local_fs_type = kmalloc(sizeof(*local_fs_type) + sizeof(void *),
+					GFP_KERNEL);
+	if (local_fs_type == NULL)
+		return -ENOMEM;
+
+	memset(local_fs_type, 0, sizeof(*local_fs_type));
+	local_fs_type->name = template->name;
+	local_fs_type->fs_flags = template->fs_flags;
+	local_fs_type->get_sb = template->get_sb;
+	local_fs_type->kill_sb = template->kill_sb;
+	local_fs_type->owner = template->owner;
+	/*
+	 * 1. we do not have refcounter on fstype
+	 * 2. fstype holds reference to ve using get_ve()/put_ve().
+	 * so we free fstype when freeing ve and we are sure it's ok to free it
+	 */
+	SET_VE_OWNER_FSTYPE(local_fs_type, ve);
+	get_filesystem(local_fs_type);	/* get_ve() inside */
+
+	ret = register_filesystem(local_fs_type); /* does not get */
+	if (ret)
+		goto reg_err;
+
+	mnt = kern_mount(local_fs_type);
+	if (IS_ERR(mnt))
+		goto mnt_err;
+
+	/* Usage counters after succesful execution kern_mount:
+	 * local_fs_type - +1 (get_fs_type,get_sb_single,put_filesystem)
+	 * mnt - +1 == 1 (alloc_vfsmnt)
+	 */
+
+	*p_fs_type = local_fs_type;
+	*p_mnt = mnt;
+	return 0;
+
+mnt_err:
+	ret = PTR_ERR(mnt);
+	unregister_filesystem(local_fs_type); /* does not put */
+
+reg_err:
+	put_filesystem(local_fs_type);
+	kfree(local_fs_type);
+	printk(KERN_DEBUG
+	       "register_ve_fs_type(\"%s\") err=%d\n", template->name, ret);
+	return ret;
+}
+
+static void umount_ve_fs_type(struct file_system_type *local_fs_type)
+{
+	struct vfsmount *mnt;
+	struct list_head *p, *q;
+	LIST_HEAD(kill);
+
+	down_write(&current->namespace->sem);
+	spin_lock(&vfsmount_lock);
+	list_for_each_safe(p, q, &current->namespace->list) {
+		mnt = list_entry(p, struct vfsmount, mnt_list);
+		if (mnt->mnt_sb->s_type != local_fs_type)
+			continue;
+		list_del(p);
+		list_add(p, &kill);
+	}
+
+	while (!list_empty(&kill)) {
+		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
+		umount_tree(mnt);
+	}
+	spin_unlock(&vfsmount_lock);
+	up_write(&current->namespace->sem);
+}
+
+static void unregister_ve_fs_type(struct file_system_type *local_fs_type,
+		struct vfsmount *local_fs_mount)
+{
+	if (local_fs_mount == NULL && local_fs_type == NULL)
+		return;
+
+	VZTRACE("unregister_ve_fs_type(\"%s\")\n", local_fs_type->name);
+
+	unregister_filesystem(local_fs_type);
+	umount_ve_fs_type(local_fs_type);
+	if (local_fs_mount)
+		kern_umount(local_fs_mount); /* alias to mntput, drop our ref */
+	put_filesystem(local_fs_type);
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * FS-related helpers to VE start/stop
+ *
+ **********************************************************************
+ **********************************************************************/
+
+#ifdef CONFIG_SYSCTL
+static ctl_table ve_sysctl_tables[] = {
+	/* kernel */
+	{
+		.ctl_name	= CTL_KERN,
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= &ve_sysctl_tables[2],
+	},
+	{ .ctl_name = 0 },
+	/* kernel/[vars] */
+	{
+		.ctl_name	= KERN_NODENAME,
+		.procname	= "hostname",
+		.maxlen 	= 64,
+		.mode		= 0644,
+		.proc_handler	= &proc_doutsstring,
+		.strategy	= &sysctl_string,
+	},
+	{
+		.ctl_name	= KERN_DOMAINNAME,
+		.procname	= "domainname",
+		.maxlen		= 64,
+		.mode		= 0644,
+		.proc_handler	= &proc_doutsstring,
+		.strategy	= &sysctl_string,
+	},
+	{
+		.ctl_name	= KERN_SHMMAX,
+		.procname	= "shmmax",
+		.maxlen		= sizeof(size_t),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax,
+	},
+	{
+		.ctl_name	= KERN_SHMALL,
+		.procname	= "shmall",
+		.maxlen		= sizeof(size_t),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax,
+	},
+	{
+		.ctl_name	= KERN_SHMMNI,
+		.procname	= "shmmni",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_MSGMAX,
+		.procname	= "msgmax",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_MSGMNI,
+		.procname	= "msgmni",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_MSGMNB,
+		.procname	= "msgmnb",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_SEM,
+		.procname	= "sem",
+		.maxlen		= 4 * sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0, }
+};
+
+static int register_ve_sysctltables(struct ve_struct *ve)
+{
+	struct ctl_table_header *header;
+	ctl_table *root, *table;
+	
+	VZTRACE("register_ve_sysctltables\n");
+
+	root = clone_sysctl_template(ve_sysctl_tables,
+			sizeof(ve_sysctl_tables) / sizeof(ctl_table));
+	if (root == NULL)
+		goto out;
+
+	table = root->child;
+	table[0].data = &ve->utsname->nodename;
+	table[1].data = &ve->utsname->domainname;
+	table[2].data = &ve->_shm_ctlmax;
+	table[3].data = &ve->_shm_ctlall;
+	table[4].data = &ve->_shm_ctlmni;
+	table[5].data = &ve->_msg_ctlmax;
+	table[6].data = &ve->_msg_ctlmni;
+	table[7].data = &ve->_msg_ctlmnb;
+	table[8].data = &ve->_sem_ctls[0];
+
+	/* insert at head to override kern entries */
+	header = register_sysctl_table(root, 1);
+	if (header == NULL)
+		goto out_free;
+
+	ve->kern_header = header;
+	ve->kern_table = root;
+	return 0;
+
+out_free:
+	free_sysctl_clone(root);
+out:
+	return -ENOMEM;
+}
+
+static inline void unregister_ve_sysctltables(struct ve_struct *ve)
+{
+	unregister_sysctl_table(ve->kern_header);
+}
+
+static inline void free_ve_sysctltables(struct ve_struct *ve)
+{
+	free_sysctl_clone(ve->kern_table);
+}
+#endif
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE start: subsystems
+ *
+ **********************************************************************
+ **********************************************************************/
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+
+extern struct new_utsname virt_utsname;
+
+static int init_ve_utsname(struct ve_struct *ve)
+{
+	ve->utsname = kmalloc(sizeof(*ve->utsname), GFP_KERNEL);
+	if (ve->utsname == NULL)
+		return -ENOMEM;
+
+	down_read(&uts_sem); /* protect the source */
+	memcpy(ve->utsname, &system_utsname, sizeof(*ve->utsname));
+	memcpy(ve->utsname->release, virt_utsname.release,
+			sizeof(virt_utsname.release));
+	up_read(&uts_sem);
+
+	return 0;
+}
+
+static void free_ve_utsname(struct ve_struct *ve)
+{
+	kfree(ve->utsname);
+	ve->utsname = NULL;
+}
+
+static int init_fini_ve_mibs(struct ve_struct *ve, int fini)
+{
+	if (fini)
+		goto fini;
+	if (!(ve->_net_statistics[0] = alloc_percpu(struct linux_mib)))
+		goto out1;
+	if (!(ve->_net_statistics[1] = alloc_percpu(struct linux_mib)))
+		goto out2;
+	if (!(ve->_ip_statistics[0] = alloc_percpu(struct ipstats_mib)))
+		goto out3;
+	if (!(ve->_ip_statistics[1] = alloc_percpu(struct ipstats_mib)))
+		goto out4;
+	if (!(ve->_icmp_statistics[0] = alloc_percpu(struct icmp_mib)))
+		goto out5;
+	if (!(ve->_icmp_statistics[1] = alloc_percpu(struct icmp_mib)))
+		goto out6;
+	if (!(ve->_tcp_statistics[0] = alloc_percpu(struct tcp_mib)))
+		goto out7;
+	if (!(ve->_tcp_statistics[1] = alloc_percpu(struct tcp_mib)))
+		goto out8;
+	if (!(ve->_udp_statistics[0] = alloc_percpu(struct udp_mib)))
+		goto out9;
+	if (!(ve->_udp_statistics[1] = alloc_percpu(struct udp_mib)))
+		goto out10;
+	return 0;
+fini:
+	free_percpu(ve->_udp_statistics[1]);
+out10:
+	free_percpu(ve->_udp_statistics[0]);
+out9:
+	free_percpu(ve->_tcp_statistics[1]);
+out8:
+	free_percpu(ve->_tcp_statistics[0]);
+out7:
+	free_percpu(ve->_icmp_statistics[1]);
+out6:
+	free_percpu(ve->_icmp_statistics[0]);
+out5:
+	free_percpu(ve->_ip_statistics[1]);
+out4:
+	free_percpu(ve->_ip_statistics[0]);
+out3:
+	free_percpu(ve->_net_statistics[1]);
+out2:
+	free_percpu(ve->_net_statistics[0]);
+out1:
+	return -ENOMEM;
+}
+
+static inline int init_ve_mibs(struct ve_struct *ve)
+{
+	return init_fini_ve_mibs(ve, 0);
+}
+
+static inline void fini_ve_mibs(struct ve_struct *ve)
+{
+	(void)init_fini_ve_mibs(ve, 1);
+}
+
+extern struct net_device templ_loopback_dev;
+static void veloop_setup(struct net_device *dev)
+{
+	int padded;
+	padded = dev->padded;
+	memcpy(dev, &templ_loopback_dev, sizeof(struct net_device));
+	dev->padded = padded;
+}
+
+static int init_ve_netdev(void)
+{
+	struct ve_struct *ve;
+	struct net_device_stats *stats;
+	int err;
+
+	ve = get_exec_env();
+	INIT_HLIST_HEAD(&ve->_net_dev_head);
+	ve->_net_dev_base = NULL;
+	ve->_net_dev_tail = &ve->_net_dev_base;
+
+	err = -ENOMEM;
+	ve->_loopback_dev = alloc_netdev(0, templ_loopback_dev.name, 
+					 veloop_setup);
+	if (ve->_loopback_dev == NULL)
+		goto out;
+
+	ve->_loopback_stats = kmalloc(sizeof(struct net_device_stats)*NR_CPUS,
+								GFP_KERNEL);
+	if (ve->_loopback_stats == NULL)
+		goto out_free_netdev;
+	if (loopback_dev.get_stats != NULL) {
+		stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+		if (stats != NULL) {
+			memset(stats, 0, sizeof(struct net_device_stats));
+			ve->_loopback_dev->priv = stats;
+			ve->_loopback_dev->get_stats = loopback_dev.get_stats;
+			ve->_loopback_dev->destructor = loopback_dev.destructor;
+		}
+	}
+	err = register_netdev(ve->_loopback_dev);
+	if (err)
+		goto out_free_stats;
+	return 0;
+
+out_free_stats:
+	if (ve->_loopback_dev->priv != NULL)
+		kfree(ve->_loopback_dev->priv);
+	kfree(ve->_loopback_stats);
+out_free_netdev:
+	free_netdev(ve->_loopback_dev);
+out:
+	return err;
+}
+
+static void fini_ve_netdev(void)
+{
+	struct ve_struct *ve;
+	struct net_device *dev;
+
+	ve = get_exec_env();
+	while (1) {
+		rtnl_lock();
+		/* 
+		 * loopback is special, it can be referenced in  fib's, 
+		 * so it must be freed the last. Doing so is 
+		 * sufficient to guarantee absence of such references.
+		 */
+		if (dev_base == ve->_loopback_dev)
+			dev = dev_base->next;
+		else
+			dev = dev_base;
+		if (dev == NULL)
+			break;
+		unregister_netdevice(dev);
+		rtnl_unlock();
+		free_netdev(dev);
+	}
+	unregister_netdevice(ve->_loopback_dev);
+	rtnl_unlock();
+	free_netdev(ve->_loopback_dev);
+	ve->_loopback_dev = NULL;
+
+	kfree(ve->_loopback_stats);
+	ve->_loopback_stats = NULL;
+}
+#else
+#define init_ve_mibs(ve)	(0)
+#define fini_ve_mibs(ve)	do { } while (0)
+#define init_ve_netdev()	(0)
+#define fini_ve_netdev()	do { } while (0)
+#endif
+
+static int prepare_proc_root(struct ve_struct *ve)
+{
+	struct proc_dir_entry *de;
+
+	de = kmalloc(sizeof(struct proc_dir_entry) + 6, GFP_KERNEL);
+	if (de == NULL)
+		return -ENOMEM;
+	memset(de, 0, sizeof(struct proc_dir_entry));
+	memcpy(de + 1, "/proc", 6);
+	de->name = (char *)(de + 1);
+	de->namelen = 5;
+	de->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	de->nlink = 2;
+	atomic_set(&de->count, 1);
+
+	ve->proc_root = de;
+	return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static int init_ve_proc(struct ve_struct *ve)
+{
+	int err;
+	struct proc_dir_entry *de;
+
+	err = prepare_proc_root(ve);
+	if (err)
+		goto out_root;
+
+	err = register_ve_fs_type(ve, &proc_fs_type,
+			&ve->proc_fstype, &ve->proc_mnt);
+	if (err)
+		goto out_reg;
+
+	/* create /proc/vz in VE local proc tree */
+	err = -ENOMEM;
+	de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+	if (!de)
+		goto out_vz;
+
+#ifdef CONFIG_VE_IPTABLES
+	de = proc_mkdir("net", NULL);
+	if (!de)
+		goto out_net;
+#endif
+	return 0;
+
+#ifdef CONFIG_VE_IPTABLES
+out_net:
+	remove_proc_entry("vz", NULL);
+#endif
+out_vz:
+	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
+	ve->proc_mnt = NULL;
+out_reg:
+	/* proc_fstype and proc_root are freed in real_put_ve -> free_ve_proc */
+	;
+out_root:
+	return err;
+}
+
+static void fini_ve_proc(struct ve_struct *ve)
+{
+#ifdef CONFIG_VE_IPTABLES
+	remove_proc_entry("net", NULL);
+#endif
+	remove_proc_entry("vz", NULL);
+	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
+	ve->proc_mnt = NULL;
+}
+
+static void free_ve_proc(struct ve_struct *ve)
+{
+	/* proc filesystem frees proc_dir_entries on remove_proc_entry() only,
+	   so we check that everything was removed and not lost */
+	if (ve->proc_root && ve->proc_root->subdir) {
+		struct proc_dir_entry *p = ve->proc_root;
+		printk(KERN_WARNING "VPS: %d: proc entry /proc", ve->veid);
+		while ((p = p->subdir) != NULL)
+			printk("/%s", p->name);
+		printk(" is not removed!\n");
+	}
+
+	kfree(ve->proc_root);
+	kfree(ve->proc_fstype);
+
+	ve->proc_fstype = NULL;
+	ve->proc_root = NULL;
+}
+#else
+#define init_ve_proc(ve)	(0)
+#define fini_ve_proc(ve)	do { } while (0)
+#define free_ve_proc(ve)	do { } while (0)
+#endif
+
+#ifdef CONFIG_SYSCTL
+static int init_ve_sysctl(struct ve_struct *ve)
+{
+	int err;
+
+#ifdef CONFIG_PROC_FS
+	err = -ENOMEM;
+	ve->proc_sys_root = proc_mkdir("sys", 0);
+	if (ve->proc_sys_root == NULL)
+		goto out_proc;
+#endif
+	INIT_LIST_HEAD(&ve->sysctl_lh);
+	err = register_ve_sysctltables(ve);
+	if (err)
+		goto out_reg;
+
+	err = devinet_sysctl_init(ve);
+	if (err)
+		goto out_dev;
+
+	return 0;
+
+out_dev:
+	unregister_ve_sysctltables(ve);
+	free_ve_sysctltables(ve);
+out_reg:
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("sys", NULL);
+out_proc:
+#endif
+	return err;
+}
+
+static void fini_ve_sysctl(struct ve_struct *ve)
+{
+	devinet_sysctl_fini(ve);
+	unregister_ve_sysctltables(ve);
+	remove_proc_entry("sys", NULL);
+}
+
+static void free_ve_sysctl(struct ve_struct *ve)
+{
+	devinet_sysctl_free(ve);
+	free_ve_sysctltables(ve);
+}
+#else
+#define init_ve_sysctl(ve)	(0)
+#define fini_ve_sysctl(ve)	do { } while (0)
+#define free_ve_sysctl(ve)	do { } while (0)
+#endif
+
+#ifdef CONFIG_UNIX98_PTYS
+#include <linux/devpts_fs.h>
+
+static int init_ve_devpts(struct ve_struct *ve)
+{
+	int err;
+
+	err = -ENOMEM;
+	ve->devpts_config = kmalloc(sizeof(struct devpts_config), GFP_KERNEL);
+	if (ve->devpts_config == NULL)
+		goto out;
+	memset(ve->devpts_config, 0, sizeof(struct devpts_config));
+	ve->devpts_config->mode = 0600;
+	err = register_ve_fs_type(ve, &devpts_fs_type,
+			&ve->devpts_fstype, &ve->devpts_mnt);
+	if (err) {
+		kfree(ve->devpts_config);
+		ve->devpts_config = NULL;
+	}
+out:
+	return err;
+}
+
+static void fini_ve_devpts(struct ve_struct *ve)
+{
+	unregister_ve_fs_type(ve->devpts_fstype, ve->devpts_mnt);
+	/* devpts_fstype is freed in real_put_ve -> free_ve_filesystems */
+	ve->devpts_mnt = NULL;
+	kfree(ve->devpts_config);
+	ve->devpts_config = NULL;
+}
+#else
+#define init_ve_devpts(ve)	(0)
+#define fini_ve_devpts(ve)	do { } while (0)
+#endif
+
+static int init_ve_shmem(struct ve_struct *ve)
+{
+	return register_ve_fs_type(ve,
+				   &tmpfs_fs_type,
+				   &ve->shmem_fstype,
+				   &ve->shmem_mnt);
+}
+
+static void fini_ve_shmem(struct ve_struct *ve)
+{
+	unregister_ve_fs_type(ve->shmem_fstype, ve->shmem_mnt);
+	/* shmem_fstype is freed in real_put_ve -> free_ve_filesystems */
+	ve->shmem_mnt = NULL;
+}
+
+static inline int init_ve_sysfs_root(struct ve_struct *ve)
+{
+	struct sysfs_dirent *sysfs_root;
+
+	sysfs_root = kmalloc(sizeof(struct sysfs_dirent), GFP_KERNEL);
+	if (sysfs_root == NULL)
+		return -ENOMEM;
+
+	memset(sysfs_root, 0, sizeof(struct sysfs_dirent));
+	INIT_LIST_HEAD(&sysfs_root->s_sibling);
+	INIT_LIST_HEAD(&sysfs_root->s_children);
+	sysfs_root->s_type = SYSFS_ROOT;
+	ve->sysfs_root = sysfs_root;
+	return 0;
+}
+
+static int init_ve_sysfs(struct ve_struct *ve)
+{
+	struct subsystem *subsys;
+	struct class *nc;
+	int err;
+	extern struct subsystem class_obj_subsys;
+	extern struct subsystem class_subsys;
+	extern struct class net_class;
+
+#ifdef CONFIG_SYSFS
+	err = 0;
+	if (ve->features & VE_FEATURE_SYSFS) {
+		err = init_ve_sysfs_root(ve);
+		if (err != 0)
+			goto out;
+		err = register_ve_fs_type(ve,
+				   &sysfs_fs_type,
+				   &ve->sysfs_fstype,
+				   &ve->sysfs_mnt);
+	}
+	if (err != 0)
+		goto out_fs_type;
+#endif
+	err = -ENOMEM;
+	subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
+	if (subsys == NULL)
+		goto out_class_obj;
+	/* ick, this is ugly, the things we go through to keep from showing up
+	 * in sysfs... */
+	memset(subsys, 0, sizeof(*subsys));
+	memcpy(&subsys->kset.kobj.name, &class_obj_subsys.kset.kobj.name,
+			sizeof(subsys->kset.kobj.name));
+	subsys->kset.ktype = class_obj_subsys.kset.ktype;
+	subsys->kset.hotplug_ops = class_obj_subsys.kset.hotplug_ops;
+	subsystem_init(subsys);
+	if (!subsys->kset.subsys)
+			subsys->kset.subsys = subsys;
+	ve->class_obj_subsys = subsys;
+
+	err = -ENOMEM;
+	subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
+	if (subsys == NULL)
+		goto out_class_subsys;
+	/* ick, this is ugly, the things we go through to keep from showing up
+	 * in sysfs... */
+	memset(subsys, 0, sizeof(*subsys));
+	memcpy(&subsys->kset.kobj.name, &class_subsys.kset.kobj.name,
+			sizeof(subsys->kset.kobj.name));
+	subsys->kset.ktype = class_subsys.kset.ktype;
+	subsys->kset.hotplug_ops = class_subsys.kset.hotplug_ops;
+	ve->class_subsys = subsys;
+	err = subsystem_register(subsys);
+	if (err != 0)
+		goto out_register;
+
+	err = -ENOMEM;
+	nc = kmalloc(sizeof(*nc), GFP_KERNEL);
+	if (nc == NULL)
+		goto out_nc;
+	memset(nc, 0, sizeof(*nc));
+	nc->name = net_class.name;
+	nc->release = net_class.release;
+	nc->hotplug = net_class.hotplug;
+	err = class_register(nc);
+	if (err != 0)
+		goto out_class_register;
+	ve->net_class = nc;
+
+	ve->tty_class = init_ve_tty_class();
+	if (IS_ERR(ve->tty_class)) {
+		err = PTR_ERR(ve->tty_class);
+		ve->tty_class = NULL;
+		goto out_tty_class_register;
+	}
+
+	return err;
+
+out_tty_class_register:
+	class_unregister(ve->net_class);
+	ve->net_class = NULL;
+out_class_register:
+	kfree(nc);
+out_nc:
+	subsystem_unregister(subsys);
+out_register:
+	kfree(ve->class_subsys);
+	ve->class_subsys = NULL;
+out_class_subsys:
+	kfree(ve->class_obj_subsys);
+	ve->class_obj_subsys = NULL;
+out_class_obj:
+#ifdef CONFIG_SYSFS
+	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
+	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
+out_fs_type:
+	kfree(ve->sysfs_root);
+	ve->sysfs_root = NULL;
+out:
+#endif
+	return err;
+}
+
+static void fini_ve_sysfs(struct ve_struct *ve)
+{
+	fini_ve_tty_class(ve->tty_class);
+	class_unregister(ve->net_class);
+	subsystem_unregister(ve->class_subsys);
+
+	kfree(ve->net_class);
+	kfree(ve->class_subsys);
+	kfree(ve->class_obj_subsys);
+
+	ve->net_class = NULL;
+	ve->class_subsys = NULL;
+	ve->class_obj_subsys = NULL;
+#ifdef CONFIG_SYSFS
+	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
+	ve->sysfs_mnt = NULL;
+	kfree(ve->sysfs_root);
+	ve->sysfs_root = NULL;
+	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
+#endif
+}
+
+static void free_ve_filesystems(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSFS
+	kfree(ve->sysfs_fstype);
+	ve->sysfs_fstype = NULL;
+#endif
+	kfree(ve->shmem_fstype);
+	ve->shmem_fstype = NULL;
+
+	kfree(ve->devpts_fstype);
+	ve->devpts_fstype = NULL;
+
+	free_ve_proc(ve);
+}
+
+static int init_printk(struct ve_struct *ve)
+{
+	struct ve_prep_printk {
+		wait_queue_head_t       log_wait;
+		unsigned long           log_start;
+		unsigned long           log_end;
+		unsigned long           logged_chars;
+	} *tmp;
+
+	tmp = kmalloc(sizeof(struct ve_prep_printk), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+	memset(tmp, 0, sizeof(struct ve_prep_printk));
+	init_waitqueue_head(&tmp->log_wait);
+	ve->_log_wait = &tmp->log_wait;
+	ve->_log_start = &tmp->log_start;
+	ve->_log_end = &tmp->log_end;
+	ve->_logged_chars = &tmp->logged_chars;
+	/* ve->log_buf will be initialized later by ve_log_init() */
+	return 0;
+}
+
+static void fini_printk(struct ve_struct *ve)
+{
+	/* 
+	 * there is no spinlock protection here because nobody can use
+	 * log_buf at the moments when this code is called. 
+	 */
+	kfree(ve->log_buf);
+	kfree(ve->_log_wait);
+}
+
+static void fini_venet(struct ve_struct *ve)
+{
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	tcp_v4_kill_ve_sockets(ve);
+#endif
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	ve_mapped_devs_cleanup(ve);
+#endif
+}
+
+static int init_ve_sched(struct ve_struct *ve, unsigned int vcpus)
+{
+#ifdef CONFIG_FAIRSCHED
+	int err;
+
+	/*
+	 * We refuse to switch to an already existing node since nodes
+	 * keep a pointer to their ve_struct...
+	 */
+	err = sys_fairsched_mknod(0, 1, ve->veid);
+	if (err < 0) {
+		printk(KERN_WARNING "Can't create fairsched node %d\n",
+				ve->veid);
+		return err;
+	}
+	err = sys_fairsched_vcpus(ve->veid, vcpus);
+	if (err) {
+		printk(KERN_WARNING "Can't set fairsched vcpus on node %d\n",
+				ve->veid);
+		goto cleanup;
+	}
+	err = sys_fairsched_mvpr(current->pid, ve->veid);
+	if (err) {
+		printk(KERN_WARNING "Can't switch to fairsched node %d\n",
+				ve->veid);
+		goto cleanup;
+	}
+#endif
+	ve_sched_attach(ve);
+	return 0;
+
+#ifdef CONFIG_FAIRSCHED
+cleanup:
+	if (sys_fairsched_rmnod(ve->veid))
+		printk(KERN_ERR "Can't clean fairsched node %d\n",
+				ve->veid);
+	return err;
+#endif
+}
+
+static void fini_ve_sched(struct ve_struct *ve)
+{
+#ifdef CONFIG_FAIRSCHED
+	if (task_vsched_id(current) == ve->veid)
+		if (sys_fairsched_mvpr(current->pid, fairsched_init_node.id))
+			printk(KERN_WARNING "Can't leave fairsched node %d\n",
+					ve->veid);
+	if (sys_fairsched_rmnod(ve->veid))
+		printk(KERN_ERR "Can't remove fairsched node %d\n",
+				ve->veid);
+#endif
+}
+
+static int init_ve_struct(struct ve_struct *ve, envid_t veid,
+		u32 class_id, env_create_param_t *data,
+		struct task_struct *init_tsk)
+{
+	int n;
+
+	memset(ve, 0, sizeof(*ve));
+	(void)get_ve(ve);
+	ve->veid = veid;
+	ve->class_id = class_id;
+	ve->init_entry = init_tsk;
+	ve->features = data->feature_mask;
+	INIT_LIST_HEAD(&ve->vetask_lh);
+	INIT_LIST_HEAD(&ve->_simple_dev_list);
+	init_rwsem(&ve->op_sem);
+	ve->ifindex = -1;
+
+	for(n = 0; n < UIDHASH_SZ_VE; ++n)
+		INIT_LIST_HEAD(&ve->uidhash_table[n]);
+
+	ve->start_timespec = ve->init_entry->start_time;
+	/* The value is wrong, but it is never compared to process
+	 * start times */
+	ve->start_jiffies = get_jiffies_64();
+	ve->start_cycles = get_cycles();
+	ve->virt_pids = glob_virt_pids;
+
+	return 0;
+}
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * /proc/meminfo virtualization
+ *
+ **********************************************************************
+ **********************************************************************/
+static int ve_set_meminfo(envid_t veid, unsigned long val)
+{
+	struct ve_struct *ve;
+
+	ve = get_ve_by_id(veid);
+	if (!ve)
+		return -ESRCH;
+
+	ve->meminfo_val = val;
+	real_put_ve(ve);
+	return 0;
+}
+
+static int init_ve_meminfo(struct ve_struct *ve)
+{
+	ve->meminfo_val = 0;
+	return 0;
+}
+
+static inline void fini_ve_meminfo(struct ve_struct *ve)
+{
+}
+
+static void set_ve_root(struct ve_struct *ve, struct task_struct *tsk)
+{
+	read_lock(&tsk->fs->lock);
+	ve->fs_rootmnt = tsk->fs->rootmnt;
+	ve->fs_root = tsk->fs->root;
+	read_unlock(&tsk->fs->lock);
+	mark_tree_virtual(ve->fs_rootmnt, ve->fs_root);
+}
+
+static void set_ve_caps(struct ve_struct *ve, struct task_struct *tsk)
+{
+	/* required for real_setdevperms from register_ve_<fs> above */
+	memcpy(&ve->cap_default, &tsk->cap_effective, sizeof(kernel_cap_t));
+	cap_lower(ve->cap_default, CAP_SETVEID);
+}
+
+static int ve_list_add(struct ve_struct *ve)
+{
+	write_lock_irq(&ve_list_guard);
+	if (__find_ve_by_id(ve->veid) != NULL)
+		goto err_exists;
+
+	ve->prev = NULL;
+	ve->next = ve_list_head;
+	if (ve_list_head)
+		ve_list_head->prev = ve;
+	ve_list_head = ve;
+	nr_ve++;
+	write_unlock_irq(&ve_list_guard);
+	return 0;
+
+err_exists:
+	write_unlock_irq(&ve_list_guard);
+	return -EEXIST;
+}
+
+static void ve_list_del(struct ve_struct *ve)
+{
+	write_lock_irq(&ve_list_guard);
+	if (ve->prev)
+		ve->prev->next = ve->next;
+	else
+		ve_list_head = ve->next;
+	if (ve->next)
+		ve->next->prev = ve->prev;
+	nr_ve--;
+	write_unlock_irq(&ve_list_guard);
+}
+
+static void set_task_ve_caps(struct task_struct *tsk, struct ve_struct *ve)
+{
+	spin_lock(&task_capability_lock);
+	cap_mask(tsk->cap_effective, ve->cap_default);
+	cap_mask(tsk->cap_inheritable, ve->cap_default);
+	cap_mask(tsk->cap_permitted, ve->cap_default);
+	spin_unlock(&task_capability_lock);
+}
+
+static void move_task(struct task_struct *tsk, struct ve_struct *new,
+		struct ve_struct *old)
+{
+	/* this probihibts ptracing of task entered to VPS from host system */
+	tsk->mm->vps_dumpable = 0;
+	/* setup capabilities before enter */
+	set_task_ve_caps(tsk, new);
+
+	write_lock_irq(&tasklist_lock);
+	VE_TASK_INFO(tsk)->owner_env = new;
+	VE_TASK_INFO(tsk)->exec_env = new;
+	REMOVE_VE_LINKS(tsk);
+	SET_VE_LINKS(tsk);
+
+	atomic_dec(&old->pcounter);
+	atomic_inc(&new->pcounter);
+	real_put_ve(old);
+	get_ve(new);
+	write_unlock_irq(&tasklist_lock);
+}
+
+#if (defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)) && \
+	defined(CONFIG_NETFILTER) && defined(CONFIG_VE_IPTABLES)
+extern int init_netfilter(void);
+extern void fini_netfilter(void);
+#define init_ve_netfilter()	init_netfilter()
+#define fini_ve_netfilter()	fini_netfilter()
+#else
+#define init_ve_netfilter()	(0)
+#define fini_ve_netfilter()	do { } while (0)
+#endif
+
+#define KSYMIPTINIT(mask, ve, full_mask, mod, name, args)	\
+({								\
+	int ret = 0;						\
+	if (VE_IPT_CMP(mask, full_mask) &&			\
+		VE_IPT_CMP((ve)->_iptables_modules, 		\
+			full_mask & ~(full_mask##_MOD))) {	\
+		ret = KSYMERRCALL(1, mod, name, args);		\
+		if (ret == 0)					\
+			(ve)->_iptables_modules |=		\
+					full_mask##_MOD;	\
+		if (ret == 1)					\
+			ret = 0;				\
+	}							\
+	ret;							\
+})
+
+#define KSYMIPTFINI(mask, full_mask, mod, name, args)		\
+({								\
+ 	if (VE_IPT_CMP(mask, full_mask##_MOD))			\
+		KSYMSAFECALL_VOID(mod, name, args);		\
+})
+
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+static int do_ve_iptables(struct ve_struct *ve, __u64 init_mask,
+		int init_or_cleanup)
+{
+	int err;
+
+	err = 0;
+	if (!init_or_cleanup)
+		goto cleanup;
+
+	/* init part */
+#if defined(CONFIG_IP_NF_IPTABLES) || \
+    defined(CONFIG_IP_NF_IPTABLES_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
+			ip_tables, init_iptables, ());
+	if (err < 0)
+		goto err_iptables;
+#endif
+#if defined(CONFIG_IP_NF_CONNTRACK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK,
+			ip_conntrack, init_iptable_conntrack, ());
+	if (err < 0)
+		goto err_iptable_conntrack;
+#endif
+#if defined(CONFIG_IP_NF_FTP) || \
+    defined(CONFIG_IP_NF_FTP_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_FTP,
+			ip_conntrack_ftp, init_iptable_ftp, ());
+	if (err < 0)
+		goto err_iptable_ftp;
+#endif
+#if defined(CONFIG_IP_NF_IRC) || \
+    defined(CONFIG_IP_NF_IRC_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_IRC,
+			ip_conntrack_irc, init_iptable_irc, ());
+	if (err < 0)
+		goto err_iptable_irc;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_CONNTRACK) || \
+    defined(CONFIG_IP_NF_MATCH_CONNTRACK_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_CONNTRACK,
+			ipt_conntrack, init_iptable_conntrack_match, ());
+	if (err < 0)
+		goto err_iptable_conntrack_match;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_STATE) || \
+    defined(CONFIG_IP_NF_MATCH_STATE_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_STATE,
+			ipt_state, init_iptable_state, ());
+	if (err < 0)
+		goto err_iptable_state;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_HELPER) || \
+    defined(CONFIG_IP_NF_MATCH_HELPER_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_HELPER,
+			ipt_helper, init_iptable_helper, ());
+	if (err < 0)
+		goto err_iptable_helper;
+#endif
+#if defined(CONFIG_IP_NF_NAT) || \
+    defined(CONFIG_IP_NF_NAT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
+			iptable_nat, init_iptable_nat, ());
+	if (err < 0)
+		goto err_iptable_nat;
+#endif
+#if defined(CONFIG_IP_NF_NAT_FTP) || \
+    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_FTP,
+			ip_nat_ftp, init_iptable_nat_ftp, ());
+	if (err < 0)
+		goto err_iptable_nat_ftp;
+#endif
+#if defined(CONFIG_IP_NF_NAT_IRC) || \
+    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_IRC,
+			ip_nat_irc, init_iptable_nat_irc, ());
+	if (err < 0)
+		goto err_iptable_nat_irc;
+#endif
+#if defined(CONFIG_IP_NF_FILTER) || \
+    defined(CONFIG_IP_NF_FILTER_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER,
+			iptable_filter,	init_iptable_filter, ());
+	if (err < 0)
+		goto err_iptable_filter;
+#endif
+#if defined(CONFIG_IP_NF_MANGLE) || \
+    defined(CONFIG_IP_NF_MANGLE_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE,
+			iptable_mangle,	init_iptable_mangle, ());
+	if (err < 0)
+		goto err_iptable_mangle;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_LIMIT) || \
+    defined(CONFIG_IP_NF_MATCH_LIMIT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LIMIT,
+			ipt_limit, init_iptable_limit, ());
+	if (err < 0)
+		goto err_iptable_limit;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
+    defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_MULTIPORT,
+			ipt_multiport, init_iptable_multiport, ());
+	if (err < 0)
+		goto err_iptable_multiport;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_TOS) || \
+    defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TOS,
+			ipt_tos, init_iptable_tos, ());
+	if (err < 0)
+		goto err_iptable_tos;
+#endif
+#if defined(CONFIG_IP_NF_TARGET_TOS) || \
+    defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TOS,
+			ipt_TOS, init_iptable_TOS, ());
+	if (err < 0)
+		goto err_iptable_TOS;
+#endif
+#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
+    defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REJECT,
+			ipt_REJECT, init_iptable_REJECT, ());
+	if (err < 0)
+		goto err_iptable_REJECT;
+#endif
+#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
+    defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TCPMSS,
+			ipt_TCPMSS, init_iptable_TCPMSS, ());
+	if (err < 0)
+		goto err_iptable_TCPMSS;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_TCPMSS) || \
+    defined(CONFIG_IP_NF_MATCH_TCPMSS_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TCPMSS,
+			ipt_tcpmss, init_iptable_tcpmss, ());
+	if (err < 0)
+		goto err_iptable_tcpmss;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_TTL) || \
+    defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TTL,
+			ipt_ttl, init_iptable_ttl, ());
+	if (err < 0)
+		goto err_iptable_ttl;
+#endif
+#if defined(CONFIG_IP_NF_TARGET_LOG) || \
+    defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_LOG,
+			ipt_LOG, init_iptable_LOG, ());
+	if (err < 0)
+		goto err_iptable_LOG;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_LENGTH) || \
+    defined(CONFIG_IP_NF_MATCH_LENGTH_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LENGTH,
+			ipt_length, init_iptable_length, ());
+	if (err < 0)
+		goto err_iptable_length;
+#endif
+#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
+    defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REDIRECT,
+			ipt_REDIRECT, init_iptable_REDIRECT, ());
+	if (err < 0)
+		goto err_iptable_REDIRECT;
+#endif
+#if defined(CONFIG_IP_NF_MATCH_OWNER) || \
+    defined(CONFIG_IP_NF_MATCH_OWNER_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_OWNER,
+			ipt_owner, init_iptable_owner, ());
+	if (err < 0)
+		goto err_iptable_owner;
+#endif
+	return 0;
+
+/* ------------------------------------------------------------------------- */
+
+cleanup:
+#if defined(CONFIG_IP_NF_MATCH_OWNER) || \
+    defined(CONFIG_IP_NF_MATCH_OWNER_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_OWNER,
+			ipt_owner, fini_iptable_owner, ());
+err_iptable_owner:
+#endif
+#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
+    defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REDIRECT,
+			ipt_REDIRECT, fini_iptable_REDIRECT, ());
+err_iptable_REDIRECT:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_LENGTH) || \
+    defined(CONFIG_IP_NF_MATCH_LENGTH_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LENGTH,
+			ipt_length, fini_iptable_length, ());
+err_iptable_length:
+#endif
+#if defined(CONFIG_IP_NF_TARGET_LOG) || \
+    defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_LOG,
+			ipt_LOG, fini_iptable_LOG, ());
+err_iptable_LOG:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_TTL) || \
+    defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TTL,
+			ipt_ttl, fini_iptable_ttl, ());
+err_iptable_ttl:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_TCPMSS) || \
+    defined(CONFIG_IP_NF_MATCH_TCPMSS_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TCPMSS,
+			ipt_tcpmss, fini_iptable_tcpmss, ());
+err_iptable_tcpmss:
+#endif
+#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
+    defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TCPMSS,
+			ipt_TCPMSS, fini_iptable_TCPMSS, ());
+err_iptable_TCPMSS:
+#endif
+#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
+    defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REJECT,
+			ipt_REJECT, fini_iptable_REJECT, ());
+err_iptable_REJECT:
+#endif
+#if defined(CONFIG_IP_NF_TARGET_TOS) || \
+    defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TOS,
+			ipt_TOS, fini_iptable_TOS, ());
+err_iptable_TOS:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_TOS) || \
+    defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TOS,
+			ipt_tos, fini_iptable_tos, ());
+err_iptable_tos:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
+    defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_MULTIPORT,
+			ipt_multiport, fini_iptable_multiport, ());
+err_iptable_multiport:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_LIMIT) || \
+    defined(CONFIG_IP_NF_MATCH_LIMIT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LIMIT,
+			ipt_limit, fini_iptable_limit, ());
+err_iptable_limit:
+#endif
+#if defined(CONFIG_IP_NF_MANGLE) || \
+    defined(CONFIG_IP_NF_MANGLE_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE,
+			iptable_mangle,	fini_iptable_mangle, ());
+err_iptable_mangle:
+#endif
+#if defined(CONFIG_IP_NF_FILTER) || \
+    defined(CONFIG_IP_NF_FILTER_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER,
+			iptable_filter,	fini_iptable_filter, ());
+err_iptable_filter:
+#endif
+#if defined(CONFIG_IP_NF_NAT_IRC) || \
+    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_IRC,
+			ip_nat_irc, fini_iptable_nat_irc, ());
+err_iptable_nat_irc:
+#endif
+#if defined(CONFIG_IP_NF_NAT_FTP) || \
+    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_FTP,
+			ip_nat_ftp, fini_iptable_nat_ftp, ());
+err_iptable_nat_ftp:
+#endif
+#if defined(CONFIG_IP_NF_NAT) || \
+    defined(CONFIG_IP_NF_NAT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
+			iptable_nat, fini_iptable_nat, ());
+err_iptable_nat:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_HELPER) || \
+    defined(CONFIG_IP_NF_MATCH_HELPER_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_HELPER,
+			ipt_helper, fini_iptable_helper, ());
+err_iptable_helper:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_STATE) || \
+    defined(CONFIG_IP_NF_MATCH_STATE_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_STATE,
+			ipt_state, fini_iptable_state, ());
+err_iptable_state:
+#endif
+#if defined(CONFIG_IP_NF_MATCH_CONNTRACK) || \
+    defined(CONFIG_IP_NF_MATCH_CONNTRACK_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_CONNTRACK,
+			ipt_conntrack, fini_iptable_conntrack_match, ());
+err_iptable_conntrack_match:
+#endif
+#if defined(CONFIG_IP_NF_IRC) || \
+    defined(CONFIG_IP_NF_IRC_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_IRC,
+			ip_conntrack_irc, fini_iptable_irc, ());
+err_iptable_irc:
+#endif
+#if defined(CONFIG_IP_NF_FTP) || \
+    defined(CONFIG_IP_NF_FTP_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_FTP,
+			ip_conntrack_ftp, fini_iptable_ftp, ());
+err_iptable_ftp:
+#endif
+#if defined(CONFIG_IP_NF_CONNTRACK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK,
+			ip_conntrack, fini_iptable_conntrack, ());
+err_iptable_conntrack:
+#endif
+#if defined(CONFIG_IP_NF_IPTABLES) || \
+    defined(CONFIG_IP_NF_IPTABLES_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
+			ip_tables, fini_iptables, ());
+err_iptables:
+#endif
+	ve->_iptables_modules = 0;
+
+	return err;
+}
+#else
+#define do_ve_iptables(ve, initmask, init)	(0)
+#endif
+
+static inline int init_ve_iptables(struct ve_struct *ve, __u64 init_mask)
+{
+	return do_ve_iptables(ve, init_mask, 1);
+}
+
+static inline void fini_ve_iptables(struct ve_struct *ve, __u64 init_mask)
+{
+	(void)do_ve_iptables(ve, init_mask, 0);
+}
+
+static void flush_ve_iptables(struct ve_struct *ve)
+{
+	/*
+	 * flush all rule tables first,
+	 * this helps us to avoid refs to freed objs
+	 */
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE, ip_tables,
+			ipt_flush_table, (ve->_ipt_mangle_table));
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER, ip_tables,
+			ipt_flush_table, (ve->_ve_ipt_filter_pf));
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT, ip_tables,
+			ipt_flush_table, (ve->_ip_conntrack->_ip_nat_table));
+}
+
+static struct list_head ve_hooks[VE_MAX_HOOKS];
+static DECLARE_RWSEM(ve_hook_sem);
+
+int ve_hook_register(struct ve_hook *vh)
+{
+	struct list_head *lh;
+	struct ve_hook *tmp;
+
+	down_write(&ve_hook_sem);
+	list_for_each(lh, &ve_hooks[vh->hooknum]) {
+		tmp = list_entry(lh, struct ve_hook, list);
+		if (vh->priority < tmp->priority)
+			break;
+	}
+	list_add_tail(&vh->list, lh);
+	up_write(&ve_hook_sem);
+	return 0;
+}
+EXPORT_SYMBOL(ve_hook_register);
+
+void ve_hook_unregister(struct ve_hook *vh)
+{
+	down_write(&ve_hook_sem);
+	list_del(&vh->list);
+	up_write(&ve_hook_sem);
+}
+EXPORT_SYMBOL(ve_hook_unregister);
+
+static int ve_hook_iterate(unsigned int hooknum, void *data)
+{
+	struct ve_hook *vh;
+	int err;
+
+	err = 0;
+	down_read(&ve_hook_sem);
+	list_for_each_entry(vh, &ve_hooks[hooknum], list) {
+		if (!try_module_get(vh->owner))
+			continue;
+		err = vh->hook(hooknum, data);
+		module_put(vh->owner);
+		if (err)
+			break;
+	}
+
+	if (err) {
+		list_for_each_entry_continue_reverse(vh,
+					&ve_hooks[hooknum], list) {
+			if (!try_module_get(vh->owner))
+				continue;
+			if (vh->undo)
+				vh->undo(hooknum, data);
+			module_put(vh->owner);
+		}
+	}
+	up_read(&ve_hook_sem);
+	return err;
+}
+
+static void ve_hook_iterate_cleanup(unsigned int hooknum, void *data)
+{
+	struct ve_hook *vh;
+
+	down_read(&ve_hook_sem);
+	list_for_each_entry_reverse(vh, &ve_hooks[hooknum], list) {
+		if (!try_module_get(vh->owner))
+			continue;
+		(void)vh->hook(hooknum, data);
+		module_put(vh->owner);
+	}
+	up_read(&ve_hook_sem);
+}
+
+static int alone_in_pgrp(struct task_struct *tsk)
+{
+	struct task_struct *p;
+	int alone = 0;
+
+	read_lock(&tasklist_lock);
+	do_each_task_pid_all(tsk->pid, PIDTYPE_PGID, p) {
+		if (p != tsk)
+			goto out;
+	} while_each_task_pid_all(tsk->pid, PIDTYPE_PGID, p);
+	do_each_task_pid_all(tsk->pid, PIDTYPE_SID, p) {
+		if (p != tsk)
+			goto out;
+	} while_each_task_pid_all(tsk->pid, PIDTYPE_SID, p);
+	alone = 1;
+out:
+	read_unlock(&tasklist_lock);
+	return alone;
+}
+
+static int do_env_create(envid_t veid, unsigned int flags, u32 class_id,
+			 env_create_param_t *data, int datalen)
+{
+	struct task_struct *tsk;
+	struct ve_struct *old;
+	struct ve_struct *old_exec;
+	struct ve_struct *ve;
+ 	__u64 init_mask;
+	int err;
+
+	tsk = current;
+	old = VE_TASK_INFO(tsk)->owner_env;
+
+	if (!thread_group_leader(tsk) || !thread_group_empty(tsk))
+		return -EINVAL;
+
+	if (tsk->signal->tty) {
+		printk("ERR: VE init has controlling terminal\n");
+		return -EINVAL;
+	}
+
+	if (tsk->signal->pgrp != tsk->pid ||
+	    tsk->signal->session != tsk->pid) {
+		int may_setsid;
+		read_lock(&tasklist_lock);
+		may_setsid = (find_pid(PIDTYPE_PGID, tsk->pid) == NULL);
+		read_unlock(&tasklist_lock);
+		if (!may_setsid) {
+			printk("ERR: VE init is process group leader\n");
+			return -EINVAL;
+		}
+	}
+	/* Check that the process is not a leader of non-empty group/session.
+	 * If it is, we cannot virtualize its PID and must fail. */
+	if (!alone_in_pgrp(tsk)) {
+		printk("ERR: VE init is not alone in process group\n");
+		return -EINVAL;
+	}
+
+	VZTRACE("%s: veid=%d classid=%d pid=%d\n",
+		__FUNCTION__, veid, class_id, current->pid);
+
+	err = -ENOMEM;
+	ve = kmalloc(sizeof(struct ve_struct), GFP_KERNEL);
+	if (ve == NULL)
+		goto err_struct;
+
+	init_ve_struct(ve, veid, class_id, data, tsk);
+	__module_get(THIS_MODULE);
+	down_write(&ve->op_sem);
+	if (flags & VE_LOCK)
+		ve->is_locked = 1;
+	if ((err = ve_list_add(ve)) < 0)
+		goto err_exist;
+
+	/* this should be done before context switching */
+	if ((err = init_printk(ve)) < 0)
+		goto err_log_wait;
+
+	old_exec = set_exec_env(ve);
+
+	if ((err = init_ve_sched(ve, data->total_vcpus)) < 0)
+		goto err_sched;
+
+	/* move user to VE */
+	if ((err = set_user(0, 0)) < 0)
+		goto err_set_user;
+
+	set_ve_root(ve, tsk);
+
+	if ((err = init_ve_utsname(ve)))
+		goto err_utsname;
+
+	if ((err = init_ve_mibs(ve)))
+		goto err_mibs;
+
+	if ((err = init_ve_proc(ve)))
+		goto err_proc;
+
+	if ((err = init_ve_sysctl(ve)))
+		goto err_sysctl;
+
+	if ((err = init_ve_sysfs(ve)))
+		goto err_sysfs;
+
+	if ((err = ve_arp_init(ve)) < 0)
+		goto err_dev;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	if ((err = ve_ndisc_init(ve)) < 0)
+		goto err_dev;
+#endif
+
+	if ((err = init_ve_netdev()))
+		goto err_dev;
+
+	if ((err = init_ve_tty_drivers(ve)) < 0)
+		goto err_tty;
+
+	if ((err = init_ve_shmem(ve)))
+		goto err_shmem;
+
+	if ((err = init_ve_devpts(ve)))
+		goto err_devpts;
+
+	if((err = init_ve_meminfo(ve)))
+		goto err_meminf;
+
+	/* init SYSV IPC variables */
+	if ((err = init_ve_ipc(ve)) < 0)
+		goto err_ipc;
+
+	set_ve_caps(ve, tsk);
+
+	/* It is safe to initialize netfilter here as routing initialization and
+	   interface setup will be done below. This means that NO skb can be
+	   passed inside. Den */
+	/* iptables ve initialization for non ve0;
+	   ve0 init is in module_init */
+	if ((err = init_ve_netfilter()) < 0)
+		goto err_netfilter;
+
+	init_mask = data ? data->iptables_mask : VE_IP_DEFAULT;
+	if ((err = init_ve_iptables(ve, init_mask)) < 0)
+		goto err_iptables;
+
+	if ((err = init_ve_route(ve)) < 0)
+		goto err_route;
+
+	if ((err = alloc_vpid(tsk->pid, 1)) < 0)
+		goto err_vpid;
+
+	if ((err = ve_hook_iterate(VE_HOOK_INIT, (void *)ve)) < 0)
+		goto err_ve_hook;
+
+	/* finally: set vpids and move inside */
+	move_task(tsk, ve, old);
+
+	set_virt_pid(tsk, 1);
+	set_virt_tgid(tsk, 1);
+
+	set_special_pids(tsk->pid, tsk->pid);
+	current->signal->tty_old_pgrp = 0;
+	set_virt_pgid(tsk, 1);
+	set_virt_sid(tsk, 1);
+
+	ve->is_running = 1;
+	up_write(&ve->op_sem);
+
+	printk(KERN_INFO "VPS: %d: started\n", veid);
+	return veid;
+
+err_ve_hook:
+	free_vpid(1, ve);
+err_vpid:
+	fini_venet(ve);
+	fini_ve_route(ve);
+err_route:
+	fini_ve_iptables(ve, init_mask);
+err_iptables:
+	fini_ve_netfilter();
+err_netfilter:
+	fini_ve_ipc(ve);
+err_ipc:
+	fini_ve_meminfo(ve);
+err_meminf:
+	fini_ve_devpts(ve);
+err_devpts:
+	fini_ve_shmem(ve);
+err_shmem:
+	fini_ve_tty_drivers(ve);
+err_tty:
+	fini_ve_netdev();
+err_dev:
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	ve_ndisc_fini(ve);
+#endif
+	ve_arp_fini(ve);
+	fini_ve_sysfs(ve);
+err_sysfs:
+	fini_ve_sysctl(ve);
+err_sysctl:
+	fini_ve_proc(ve);
+err_proc:
+	do_clean_devperms(ve->veid); /* register procfs adds devperms */
+	fini_ve_mibs(ve);
+err_mibs:
+	/* free_ve_utsname() is called inside real_put_ve() */ ;
+err_utsname:
+	/* It is safe to restore current->envid here because
+	 * ve_fairsched_detach does not use current->envid. */
+	/* Really fairsched code uses current->envid in sys_fairsched_mknod 
+	 * only.  It is correct if sys_fairsched_mknod is called from
+	 * userspace.  If sys_fairsched_mknod is called from
+	 * ve_fairsched_attach, then node->envid and node->parent_node->envid
+	 * are explicitly set to valid value after the call. */
+	/* FIXME */
+	VE_TASK_INFO(tsk)->owner_env = old;
+	VE_TASK_INFO(tsk)->exec_env = old_exec;
+	/* move user back */
+	if (set_user(0, 0) < 0)
+		printk(KERN_WARNING"Can't restore UID\n");
+
+err_set_user:
+	fini_ve_sched(ve);
+err_sched:
+	(void)set_exec_env(old_exec);
+
+	/* we can jump here having incorrect envid */
+	VE_TASK_INFO(tsk)->owner_env = old;
+	fini_printk(ve);
+err_log_wait:
+	ve_list_del(ve);
+	up_write(&ve->op_sem);
+
+	real_put_ve(ve);
+err_struct:
+	printk(KERN_INFO "VPS: %d: failed to start with err=%d\n", veid, err);
+	return err;
+
+err_exist:
+	kfree(ve);
+	goto err_struct;
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE start/stop callbacks
+ *
+ **********************************************************************
+ **********************************************************************/
+
+int real_env_create(envid_t veid, unsigned flags, u32 class_id,
+			env_create_param_t *data, int datalen)
+{
+	int status;
+	struct ve_struct *ve;
+
+	if (!flags) {
+		status = get_exec_env()->veid;
+		goto out;
+	}
+
+	status = -EPERM;
+	if (!capable(CAP_SETVEID))
+		goto out;
+
+	status = -EINVAL;
+	if ((flags & VE_TEST) && (flags & (VE_ENTER|VE_CREATE)))
+		goto out;
+
+	status = -EINVAL;
+	ve = get_ve_by_id(veid);
+	if (ve) {
+		if (flags & VE_TEST) {
+			status = 0;
+			goto out_put;
+		}
+		if (flags & VE_EXCLUSIVE) {
+			status = -EACCES;
+			goto out_put;
+		}
+		if (flags & VE_CREATE) {
+			flags &= ~VE_CREATE;
+			flags |= VE_ENTER;
+		}
+	} else {
+		if (flags & (VE_TEST|VE_ENTER)) {
+			status = -ESRCH;
+			goto out;
+		}
+	}
+
+	if (flags & VE_CREATE) {
+		status = do_env_create(veid, flags, class_id, data, datalen);
+		goto out;
+	} else if (flags & VE_ENTER)
+		status = do_env_enter(ve, flags);
+
+	/* else: returning EINVAL */
+
+out_put:
+	real_put_ve(ve);
+out:
+	return status;
+}
+
+static int do_env_enter(struct ve_struct *ve, unsigned int flags)
+{
+	struct task_struct *tsk = current;
+	int err;
+
+	VZTRACE("%s: veid=%d\n", __FUNCTION__, ve->veid);
+
+	err = -EBUSY;
+	down_read(&ve->op_sem);
+	if (!ve->is_running)
+		goto out_up;
+	if (ve->is_locked && !(flags & VE_SKIPLOCK))
+		goto out_up;
+	err = -EINVAL;
+	if (!thread_group_leader(tsk) || !thread_group_empty(tsk))
+		goto out_up;
+
+#ifdef CONFIG_FAIRSCHED
+	err = sys_fairsched_mvpr(current->pid, ve->veid);
+	if (err)
+		goto out_up;
+#endif
+
+	ve_sched_attach(ve);
+	move_task(current, ve, VE_TASK_INFO(tsk)->owner_env);
+
+	/* Check that the process is not a leader of non-empty group/session.
+	 * If it is, we cannot virtualize its PID. Do not fail, just leave
+	 * it non-virtual.
+	 */
+	if (!is_virtual_pid(virt_pid(tsk)) && alone_in_pgrp(tsk)) {
+		pid_t vpid = alloc_vpid(tsk->pid, -1);
+		if (vpid > 0) {
+			set_virt_pid(tsk, vpid);
+			set_virt_tgid(tsk, vpid);
+			if (tsk->signal->pgrp == tsk->pid)
+				set_virt_pgid(tsk, vpid);
+			if (tsk->signal->session == tsk->pid)
+				set_virt_sid(tsk, vpid);
+		}
+	}
+	/* Unlike VE_CREATE, we do not setsid() in VE_ENTER.
+	 * Process is allowed to be in an external group/session.
+	 * If user space callers wants, it will do setsid() after
+	 * VE_ENTER.
+	 */
+	err = VE_TASK_INFO(tsk)->owner_env->veid;
+
+out_up:
+	up_read(&ve->op_sem);
+	return err;
+}
+
+static void env_cleanup(struct ve_struct *ve)
+{
+	struct ve_struct *old_ve;
+
+	VZTRACE("real_do_env_cleanup\n");
+
+	down_read(&ve->op_sem);
+	old_ve = set_exec_env(ve);
+
+	ve_hook_iterate_cleanup(VE_HOOK_FINI, (void *)ve);
+
+	fini_venet(ve);
+
+	/* no new packets in flight beyond this point */
+	synchronize_net();
+	/* skb hold dst_entry, and in turn lies in the ip fragment queue */
+	ip_fragment_cleanup(ve);
+
+	fini_ve_netdev();
+	fini_ve_route(ve);
+	ve_arp_fini(ve);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	ve_ndisc_fini(ve);
+#endif
+
+	/* kill iptables */
+	/* No skb belonging to VE can exist at this point as unregister_netdev
+	   is an operation awaiting until ALL skb's gone */
+	flush_ve_iptables(ve);
+	fini_ve_iptables(ve, ve->_iptables_modules);
+	fini_ve_netfilter();
+
+	ve_ipc_cleanup();
+
+	fini_ve_sched(ve);
+	do_clean_devperms(ve->veid);
+
+	fini_ve_devpts(ve);
+	fini_ve_shmem(ve);
+	fini_ve_sysfs(ve);
+	unregister_ve_tty_drivers(ve);
+	fini_ve_sysctl(ve);
+	fini_ve_proc(ve);
+	fini_ve_meminfo(ve);
+
+	fini_ve_mibs(ve);
+
+	(void)set_exec_env(old_ve);
+	fini_printk(ve);	/* no printk can happen in ve context anymore */
+
+	ve_list_del(ve);
+	up_read(&ve->op_sem);
+
+	real_put_ve(ve);
+}
+
+static struct list_head ve_cleanup_list;
+static spinlock_t ve_cleanup_lock;
+
+static DECLARE_COMPLETION(vzmond_complete);
+static struct task_struct *vzmond_thread;
+static volatile int stop_vzmond;
+
+void real_do_env_cleanup(struct ve_struct *ve)
+{
+	spin_lock(&ve_cleanup_lock);
+	list_add_tail(&ve->cleanup_list, &ve_cleanup_list);
+	spin_unlock(&ve_cleanup_lock);
+	wake_up_process(vzmond_thread);
+}
+
+static int vzmond_helper(void *arg)
+{
+	char name[18];
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)arg;
+	snprintf(name, sizeof(name), "vzmond/%d", ve->veid);
+	daemonize(name);
+	env_cleanup(ve);
+	module_put_and_exit(0);
+}
+
+static void do_pending_env_cleanups(void)
+{
+	int err;
+	struct ve_struct *ve;
+
+	spin_lock(&ve_cleanup_lock);
+	while (1) {
+		if (list_empty(&ve_cleanup_list) || need_resched())
+			break;
+
+		ve = list_first_entry(&ve_cleanup_list, struct ve_struct,
+				cleanup_list);
+		list_del(&ve->cleanup_list);
+		spin_unlock(&ve_cleanup_lock);
+
+		__module_get(THIS_MODULE);
+		err = kernel_thread(vzmond_helper, (void *)ve, 0);
+		if (err < 0) {
+			env_cleanup(ve);
+			module_put(THIS_MODULE);
+		}
+
+		spin_lock(&ve_cleanup_lock);
+	}
+	spin_unlock(&ve_cleanup_lock);
+}
+
+static inline int have_pending_cleanups(void)
+{
+	return !list_empty(&ve_cleanup_list);
+}
+
+static int vzmond(void *arg)
+{
+	daemonize("vzmond");
+	vzmond_thread = current;
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	while (!stop_vzmond || have_pending_cleanups()) {
+		schedule();
+		if (signal_pending(current))
+			flush_signals(current);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
+
+		do_pending_env_cleanups();
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (have_pending_cleanups())
+			__set_current_state(TASK_RUNNING);
+	}
+
+	__set_task_state(current, TASK_RUNNING);
+	complete_and_exit(&vzmond_complete, 0);
+}
+
+static int __init init_vzmond(void)
+{
+	INIT_LIST_HEAD(&ve_cleanup_list);
+	spin_lock_init(&ve_cleanup_lock);
+	stop_vzmond = 0;
+	return kernel_thread(vzmond, NULL, 0);
+}
+
+static void fini_vzmond(void)
+{
+	stop_vzmond = 1;
+	wake_up_process(vzmond_thread);
+	wait_for_completion(&vzmond_complete);
+	WARN_ON(!list_empty(&ve_cleanup_list));
+}
+
+void real_do_env_free(struct ve_struct *ve)
+{
+	VZTRACE("real_do_env_free\n");
+
+	ve_ipc_free(ve); /* free SYSV IPC resources */
+	free_ve_tty_drivers(ve);
+	free_ve_utsname(ve);
+	free_ve_sysctl(ve); /* free per ve sysctl data */
+	free_ve_filesystems(ve);
+	printk(KERN_INFO "VPS: %d: stopped\n", VEID(ve));
+	kfree(ve);
+
+	module_put(THIS_MODULE);
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE TTY handling
+ *
+ **********************************************************************
+ **********************************************************************/
+
+DCL_VE_OWNER(TTYDRV, TAIL_SOFT, struct tty_driver, owner_env, , ())
+
+static struct tty_driver *alloc_ve_tty_driver(struct tty_driver *base,
+					   struct ve_struct *ve)
+{
+	size_t size;
+	struct tty_driver *driver;
+
+	driver = kmalloc(sizeof(struct tty_driver), GFP_KERNEL_UBC);
+	if (!driver)
+		goto out;
+
+	memcpy(driver, base, sizeof(struct tty_driver));
+
+	driver->driver_state = NULL;
+
+	size = base->num * 3 * sizeof(void *);
+	if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
+		void **p;
+		p = kmalloc(size, GFP_KERNEL_UBC);
+		if (!p)
+			goto out_free;
+		memset(p, 0, size);
+		driver->ttys = (struct tty_struct **)p;
+		driver->termios = (struct termios **)(p + driver->num);
+		driver->termios_locked = (struct termios **)(p + driver->num * 2);
+	} else {
+		driver->ttys = NULL;
+		driver->termios = NULL;
+		driver->termios_locked = NULL;
+	}
+
+	SET_VE_OWNER_TTYDRV(driver, ve);
+	driver->flags |= TTY_DRIVER_INSTALLED;
+	driver->refcount = 0;
+
+	return driver;
+
+out_free:
+	kfree(driver);
+out:
+	return NULL;
+}
+
+static void free_ve_tty_driver(struct tty_driver *driver)
+{
+	if (!driver)
+		return;
+
+	clear_termios(driver);
+	kfree(driver->ttys);
+	kfree(driver);
+}
+
+static int alloc_ve_tty_drivers(struct ve_struct* ve)
+{
+#ifdef CONFIG_LEGACY_PTYS
+	extern struct tty_driver *pty_driver;
+	extern struct tty_driver *pty_slave_driver;
+
+	/* Traditional BSD devices */
+	ve->pty_driver = alloc_ve_tty_driver(pty_driver, ve);
+	if (!ve->pty_driver)
+		goto out_mem;
+
+	ve->pty_slave_driver = alloc_ve_tty_driver(pty_slave_driver, ve);
+	if (!ve->pty_slave_driver)
+		goto out_mem;
+
+	ve->pty_driver->other       = ve->pty_slave_driver;
+	ve->pty_slave_driver->other = ve->pty_driver;
+#endif	
+
+#ifdef CONFIG_UNIX98_PTYS
+	ve->ptm_driver = alloc_ve_tty_driver(ptm_driver, ve);
+	if (!ve->ptm_driver)
+		goto out_mem;
+
+	ve->pts_driver = alloc_ve_tty_driver(pts_driver, ve);
+	if (!ve->pts_driver)
+		goto out_mem;
+
+	ve->ptm_driver->other = ve->pts_driver;
+	ve->pts_driver->other = ve->ptm_driver;
+
+	ve->allocated_ptys = kmalloc(sizeof(*ve->allocated_ptys),
+					GFP_KERNEL_UBC);
+	if (!ve->allocated_ptys)
+		goto out_mem;
+	idr_init(ve->allocated_ptys);
+#endif
+	return 0;
+
+out_mem:
+	free_ve_tty_drivers(ve);
+	return -ENOMEM;
+}
+
+static void free_ve_tty_drivers(struct ve_struct* ve)
+{
+#ifdef CONFIG_LEGACY_PTYS
+	free_ve_tty_driver(ve->pty_driver);
+	free_ve_tty_driver(ve->pty_slave_driver);
+	ve->pty_driver = ve->pty_slave_driver = NULL;
+#endif	
+#ifdef CONFIG_UNIX98_PTYS
+	free_ve_tty_driver(ve->ptm_driver);
+	free_ve_tty_driver(ve->pts_driver);
+	kfree(ve->allocated_ptys);
+	ve->ptm_driver = ve->pts_driver = NULL;
+	ve->allocated_ptys = NULL;
+#endif
+}
+
+static inline void __register_tty_driver(struct tty_driver *driver)
+{
+	list_add(&driver->tty_drivers, &tty_drivers);
+}
+
+static inline void __unregister_tty_driver(struct tty_driver *driver)
+{
+	if (!driver)
+		return;
+	list_del(&driver->tty_drivers);
+}
+
+static int register_ve_tty_drivers(struct ve_struct* ve)
+{
+	write_lock_irq(&tty_driver_guard);
+#ifdef CONFIG_UNIX98_PTYS
+	__register_tty_driver(ve->ptm_driver);
+	__register_tty_driver(ve->pts_driver);
+#endif
+#ifdef CONFIG_LEGACY_PTYS
+	__register_tty_driver(ve->pty_driver);
+	__register_tty_driver(ve->pty_slave_driver);
+#endif	
+	write_unlock_irq(&tty_driver_guard);
+
+	return 0;
+}
+
+static void unregister_ve_tty_drivers(struct ve_struct* ve)
+{
+	VZTRACE("unregister_ve_tty_drivers\n");
+
+	write_lock_irq(&tty_driver_guard);
+	__unregister_tty_driver(ve->pty_driver);
+	__unregister_tty_driver(ve->pty_slave_driver);
+#ifdef CONFIG_UNIX98_PTYS
+	__unregister_tty_driver(ve->ptm_driver);
+	__unregister_tty_driver(ve->pts_driver);
+#endif
+	write_unlock_irq(&tty_driver_guard);
+}
+
+static int init_ve_tty_drivers(struct ve_struct *ve)
+{
+	int err;
+
+	if ((err = alloc_ve_tty_drivers(ve)))
+		goto err_ttyalloc;
+	if ((err = register_ve_tty_drivers(ve)))
+		goto err_ttyreg;
+	return 0;
+
+err_ttyreg:
+	free_ve_tty_drivers(ve);
+err_ttyalloc:
+	return err;
+}
+
+static void fini_ve_tty_drivers(struct ve_struct *ve)
+{
+	unregister_ve_tty_drivers(ve);
+	free_ve_tty_drivers(ve);
+}
+
+/*
+ * Free the termios and termios_locked structures because
+ * we don't want to get memory leaks when modular tty
+ * drivers are removed from the kernel.
+ */
+static void clear_termios(struct tty_driver *driver)
+{
+	int i;
+	struct termios *tp;
+
+	if (driver->termios == NULL)
+		return;
+	for (i = 0; i < driver->num; i++) {
+		tp = driver->termios[i];
+		if (tp) {
+			driver->termios[i] = NULL;
+			kfree(tp);
+		}
+		tp = driver->termios_locked[i];
+		if (tp) {
+			driver->termios_locked[i] = NULL;
+			kfree(tp);
+		}
+	}
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * Pieces of VE network
+ *
+ **********************************************************************
+ **********************************************************************/
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#include <asm/uaccess.h>
+#include <net/sock.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+#endif
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+static void ve_del_ip_addrs(struct net_device *dev)
+{
+	struct in_device *in_dev;
+
+	in_dev = in_dev_get(dev);
+	if (in_dev == NULL)
+		return;
+
+	while (in_dev->ifa_list != NULL) {
+		inet_del_ifa(in_dev, &in_dev->ifa_list, 1);
+	}
+	in_dev_put(in_dev);
+}
+
+static int ve_netdev_cleanup(struct net_device *dev, int to_ve)
+{
+	int err;
+
+	err = 0;
+	ve_del_ip_addrs(dev);
+	if ((dev->flags & IFF_UP) != 0)
+		err = dev_close(dev);
+	synchronize_net();
+	dev_shutdown(dev);
+	dev_mc_discard(dev);
+	free_divert_blk(dev);
+	synchronize_net();
+	return err;
+}
+
+static void __ve_dev_move(struct net_device *dev, struct ve_struct *ve_src,
+	struct ve_struct *ve_dst, struct user_beancounter *exec_ub)
+{
+	struct net_device **dp, *d;
+	struct user_beancounter *ub;
+	struct ve_struct *exec_ve;
+
+	for (d = ve_src->_net_dev_base, dp = NULL; d != NULL; 
+	     dp = &d->next, d = d->next) {
+		if (d == dev) {
+			hlist_del(&dev->name_hlist);
+			hlist_del(&dev->index_hlist);
+			if (ve_src->_net_dev_tail == &dev->next)
+				ve_src->_net_dev_tail = dp;
+			if (dp)
+				*dp = dev->next;
+			dev->next = NULL;
+			break;
+		}
+	}
+
+	*ve_dst->_net_dev_tail = dev;
+	ve_dst->_net_dev_tail = &dev->next;
+	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name, ve_dst));
+	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, ve_dst));
+	dev->owner_env = ve_dst;
+
+	ub = netdev_bc(dev)->exec_ub;
+	netdev_bc(dev)->exec_ub = get_beancounter(exec_ub);
+	put_beancounter(ub);
+
+	write_unlock_bh(&dev_base_lock);
+	exec_ve = set_exec_env(ve_src);
+	netdevice_notify(NETDEV_UNREGISTER, dev);
+	(void)set_exec_env(ve_dst);
+	netdevice_notify(NETDEV_REGISTER, dev);
+	(void)set_exec_env(exec_ve);
+	write_lock_bh(&dev_base_lock);
+}
+
+static int ve_dev_add(envid_t veid, char *dev_name)
+{
+	int err;
+	struct net_device *dev;
+	struct ve_struct *ve;
+	struct hlist_node *p;
+	struct hlist_head *head;
+
+	dev = NULL;
+	err = -ESRCH;
+
+	ve = get_ve_by_id(veid);
+	if (ve == NULL)
+		goto out;
+
+	rtnl_lock();
+
+	read_lock(&dev_base_lock);
+	hlist_for_each(p, dev_name_hash(dev_name, get_ve0())) {
+		struct net_device *d = hlist_entry(p, struct net_device, 
+						   name_hlist);
+		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
+			dev = d;
+			break;
+		}
+	}
+	read_unlock(&dev_base_lock);
+	if (dev == NULL)
+		goto out_unlock;
+
+	err = -EPERM;
+	if (!ve_is_dev_movable(dev))
+		goto out_unlock;
+
+	err = -EINVAL;
+	if (dev->flags & (IFF_SLAVE|IFF_MASTER))
+		goto out_unlock;
+
+	/* Check for existence of name */
+	head = dev_name_hash(dev->name, ve);
+	hlist_for_each(p, head) {
+		struct net_device *d
+			= hlist_entry(p, struct net_device, name_hlist);
+		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
+			err = -EEXIST;
+ 			goto out_unlock;
+		}
+ 	}
+
+	ve_netdev_cleanup(dev, 1);
+
+	write_lock_bh(&dev_base_lock);
+	__ve_dev_move(dev, get_ve0(), ve, get_exec_ub());
+	write_unlock_bh(&dev_base_lock);
+
+	err = 0;
+
+out_unlock:
+	rtnl_unlock();
+	real_put_ve(ve);
+
+	if (dev == NULL)
+		printk(KERN_WARNING "Device %s not found\n", dev_name);
+
+out:
+	return err;
+}
+
+static int ve_dev_del(envid_t veid, char *dev_name)
+{
+	int err;
+	struct net_device *dev;
+	struct ve_struct *ve, *old_exec;
+	struct hlist_node *p;
+
+	dev = NULL;
+	err = -ESRCH;
+
+	ve = get_ve_by_id(veid);
+	if (ve == NULL)
+		goto out;
+
+	rtnl_lock();
+
+	read_lock(&dev_base_lock);
+	hlist_for_each(p, dev_name_hash(dev_name, ve)) {
+		struct net_device *d = hlist_entry(p, struct net_device, 
+						   name_hlist);
+		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
+			dev = d;
+			break;
+		}
+	}
+	read_unlock(&dev_base_lock);
+	if (dev == NULL)
+		goto out_unlock;
+
+	err = -EPERM;
+	if (!ve_is_dev_movable(dev))
+		goto out_unlock;
+
+	old_exec = set_exec_env(ve);
+	ve_netdev_cleanup(dev, 0);
+	(void)set_exec_env(old_exec);
+
+	write_lock_bh(&dev_base_lock);
+	__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
+	write_unlock_bh(&dev_base_lock);
+
+	err = 0;
+
+out_unlock:
+	rtnl_unlock();
+	real_put_ve(ve);
+
+	if (dev == NULL)
+		printk(KERN_WARNING "Device %s not found\n", dev_name);
+
+out:
+	return err;
+}
+
+int real_ve_dev_map(envid_t veid, int op, char *dev_name)
+{
+	int err;
+	err = -EPERM;
+	if (!capable(CAP_SETVEID))
+		goto out;
+	switch (op)
+	{
+		case VE_NETDEV_ADD:
+			err = ve_dev_add(veid, dev_name);
+			break;
+		case VE_NETDEV_DEL:
+			err = ve_dev_del(veid, dev_name);
+			break;
+		default:
+			err = -EINVAL;
+			break;
+	}
+out:
+	return err;
+}
+
+static void ve_mapped_devs_cleanup(struct ve_struct *ve)
+{
+	struct net_device *dev;
+
+	rtnl_lock();
+	write_lock_bh(&dev_base_lock);
+restart:
+	for (dev = ve->_net_dev_base; dev != NULL; dev = dev->next)
+	{
+		if ((dev->features & NETIF_F_VENET) ||
+		    (dev == ve->_loopback_dev)) /* Skip loopback dev */
+			continue;
+		write_unlock_bh(&dev_base_lock);
+		ve_netdev_cleanup(dev, 0);
+		write_lock_bh(&dev_base_lock);
+		__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
+		goto restart;
+	}
+	write_unlock_bh(&dev_base_lock);
+	rtnl_unlock();
+}
+#endif
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE information via /proc
+ *
+ **********************************************************************
+ **********************************************************************/
+#ifdef CONFIG_PROC_FS
+static int devperms_seq_show(struct seq_file *m, void *v)
+{
+	struct devperms_struct *dp;
+	char dev_s[32], type_c;
+	unsigned use, type;
+	dev_t dev;
+
+	dp = (struct devperms_struct *)v;
+	if (dp == (struct devperms_struct *)1L) {
+		seq_printf(m, "Version: 2.7\n");
+		return 0;
+	}
+
+	use = dp->type & VE_USE_MASK;
+	type = dp->type & S_IFMT;
+	dev = dp->dev;
+
+	if ((use | VE_USE_MINOR) == use)
+		snprintf(dev_s, sizeof(dev_s), "%d:%d", MAJOR(dev), MINOR(dev));
+	else if ((use | VE_USE_MAJOR) == use)
+		snprintf(dev_s, sizeof(dev_s), "%d:*", MAJOR(dp->dev));
+	else
+		snprintf(dev_s, sizeof(dev_s), "*:*");
+
+	if (type == S_IFCHR)
+		type_c = 'c';
+	else if (type == S_IFBLK)
+		type_c = 'b';
+	else
+		type_c = '?';
+
+	seq_printf(m, "%10u %c %03o %s\n", dp->veid, type_c, dp->mask, dev_s);
+	return 0;
+}
+
+static void *devperms_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t cpos;
+	long slot;
+	struct devperms_struct *dp;
+
+	cpos = *pos;
+	read_lock(&devperms_hash_guard);
+	if (cpos-- == 0)
+		return (void *)1L;
+
+	for (slot = 0; slot < DEVPERMS_HASH_SZ; slot++)
+		for (dp = devperms_hash[slot]; dp; dp = dp->devhash_next)
+			if (cpos-- == 0) {
+				m->private = (void *)slot;
+				return dp;
+			}
+	return NULL;
+}
+
+static void *devperms_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	long slot;
+	struct devperms_struct *dp;
+
+	dp = (struct devperms_struct *)v;
+
+	if (dp == (struct devperms_struct *)1L)
+		slot = 0;
+	else if (dp->devhash_next == NULL)
+		slot = (long)m->private + 1;
+	else {
+		(*pos)++;
+		return dp->devhash_next;
+	}
+
+	for (; slot < DEVPERMS_HASH_SZ; slot++)
+		if (devperms_hash[slot]) {
+			(*pos)++;
+			m->private = (void *)slot;
+			return devperms_hash[slot];
+		}
+	return NULL;
+}
+
+static void devperms_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&devperms_hash_guard);
+}
+
+static struct seq_operations devperms_seq_op = {
+	.start	= devperms_seq_start,
+	.next	= devperms_seq_next,
+	.stop	= devperms_seq_stop,
+	.show	= devperms_seq_show,
+};
+
+static int devperms_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &devperms_seq_op);
+}
+
+static struct file_operations proc_devperms_ops = {
+	.open		= devperms_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+#if BITS_PER_LONG == 32
+#define VESTAT_LINE_WIDTH (6 * 11 + 6 * 21)
+#define VESTAT_LINE_FMT "%10u %10lu %10lu %10lu %10Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %10lu\n"
+#define VESTAT_HEAD_FMT "%10s %10s %10s %10s %10s %20s %20s %20s %20s %20s %20s %10s\n"
+#else
+#define VESTAT_LINE_WIDTH (12 * 21)
+#define VESTAT_LINE_FMT "%20u %20lu %20lu %20lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20lu\n"
+#define VESTAT_HEAD_FMT "%20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s\n"
+#endif
+
+static int vestat_seq_show(struct seq_file *m, void *v)
+{
+	struct ve_struct *ve = (struct ve_struct *)v;
+	struct ve_struct *curve;
+	int cpu;
+	unsigned long user_ve, nice_ve, system_ve;
+	unsigned long long uptime;
+	cycles_t uptime_cycles, idle_time, strv_time, used;
+
+	curve = get_exec_env();
+	if (ve == ve_list_head ||
+	    (!ve_is_super(curve) && ve == curve)) {
+		/* print header */
+		seq_printf(m, "%-*s\n",
+			VESTAT_LINE_WIDTH - 1,
+			"Version: 2.2");
+		seq_printf(m, VESTAT_HEAD_FMT, "VEID",
+					"user", "nice", "system",
+					"uptime", "idle",
+					"strv", "uptime", "used",
+					"maxlat", "totlat", "numsched");
+	}
+
+	if (ve == get_ve0())
+		return 0;
+
+	user_ve = nice_ve = system_ve = 0;
+	idle_time = strv_time = used = 0;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		user_ve += VE_CPU_STATS(ve, cpu)->user;
+		nice_ve += VE_CPU_STATS(ve, cpu)->nice;
+		system_ve += VE_CPU_STATS(ve, cpu)->system;
+		used += VE_CPU_STATS(ve, cpu)->used_time;
+		idle_time += ve_sched_get_idle_time(ve, cpu);
+	}
+	uptime_cycles = get_cycles() - ve->start_cycles;
+	uptime = get_jiffies_64() - ve->start_jiffies;
+
+	seq_printf(m, VESTAT_LINE_FMT, ve->veid,
+				user_ve, nice_ve, system_ve,
+				uptime, idle_time, 
+				strv_time, uptime_cycles, used,
+				ve->sched_lat_ve.last.maxlat,
+				ve->sched_lat_ve.last.totlat,
+				ve->sched_lat_ve.last.count);
+	return 0;
+}
+
+static void *ve_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct ve_struct *ve, *curve;
+	loff_t l;
+
+	curve = get_exec_env();
+	read_lock(&ve_list_guard);
+	if (!ve_is_super(curve)) {
+		if (*pos != 0)
+			return NULL;
+		return curve;
+	}
+	for (ve = ve_list_head, l = *pos;
+	     ve != NULL && l > 0;
+	     ve = ve->next, l--);
+	return ve;
+}
+
+static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct ve_struct *ve = (struct ve_struct *)v;
+
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
+	(*pos)++;
+	return ve->next;
+}
+
+static void ve_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_list_guard);
+}
+
+static struct seq_operations vestat_seq_op = {
+        start:  ve_seq_start,
+        next:   ve_seq_next,
+        stop:   ve_seq_stop,
+        show:   vestat_seq_show
+};
+
+static int vestat_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &vestat_seq_op);
+}
+
+static struct file_operations proc_vestat_operations = {
+        open:           vestat_open,
+        read:           seq_read,
+        llseek:         seq_lseek,
+        release:        seq_release
+};
+
+static inline unsigned long ve_used_mem(struct user_beancounter *ub)
+{
+	return glob_ve_meminfo ? ub->ub_parms[UB_OOMGUARPAGES].held :
+				 ub->ub_parms[UB_PRIVVMPAGES].held ;
+}
+
+static inline void ve_mi_replace(struct meminfo *mi)
+{
+	struct user_beancounter *ub;
+	unsigned long meminfo_val;
+	unsigned long nodettram;
+	unsigned long usedmem;
+
+	meminfo_val = get_exec_env()->meminfo_val;
+
+	if (!meminfo_val)
+		return; /* No virtualization */
+
+	nodettram = mi->si.totalram;
+	ub = top_beancounter(current->mm->mm_ub);
+	usedmem = ve_used_mem(ub);
+
+	memset(mi, 0, sizeof(*mi));
+
+	mi->si.totalram = (meminfo_val > nodettram) ?
+			nodettram : meminfo_val;
+	mi->si.freeram = (mi->si.totalram > usedmem) ?
+			(mi->si.totalram - usedmem) : 0;
+}
+
+static int meminfo_call(struct vnotifier_block *self,
+                unsigned long event, void *arg, int old_ret)
+{
+	if (event != VIRTINFO_MEMINFO)
+		return old_ret;
+
+	ve_mi_replace((struct meminfo *)arg);
+
+	return NOTIFY_OK;
+}
+
+
+static struct vnotifier_block meminfo_notifier_block = {
+	.notifier_call = meminfo_call
+};
+
+static int __init init_vecalls_proc(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_glob_entry_mod("vz/vestat",
+			S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	if (de == NULL) {
+		/* create "vz" subdirectory, if not exist */
+		(void) create_proc_glob_entry("vz",
+					      S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+		de = create_proc_glob_entry_mod("vz/vestat",
+				S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	}
+	if (de)
+		de->proc_fops = &proc_vestat_operations;
+	else
+		printk(KERN_WARNING 
+				"VZMON: can't make vestat proc entry\n");
+
+	de = create_proc_entry_mod("vz/devperms", S_IFREG | S_IRUSR, NULL,
+				THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_devperms_ops;
+	else
+		printk(KERN_WARNING
+				"VZMON: can't make devperms proc entry\n");
+
+	virtinfo_notifier_register(VITYPE_GENERAL, &meminfo_notifier_block);
+
+	return 0;
+}
+
+static void fini_vecalls_proc(void)
+{
+	remove_proc_entry("vz/devperms", NULL);
+	remove_proc_entry("vz/vestat", NULL);
+	virtinfo_notifier_unregister(VITYPE_GENERAL, &meminfo_notifier_block);
+}
+#else
+#define init_vecalls_proc()	(0)
+#define fini_vecalls_proc()	do { } while (0)
+#endif /* CONFIG_PROC_FS */
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * User ctl
+ *
+ **********************************************************************
+ **********************************************************************/
+
+int vzcalls_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+static struct vzioctlinfo vzcalls = {
+	type: VZCTLTYPE,
+	func: vzcalls_ioctl,
+	owner: THIS_MODULE,
+};
+
+int vzcalls_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	    case VZCTL_MARK_ENV_TO_DOWN: {
+		        /* Compatibility issue */
+		        err = 0;
+		}
+		break;
+	    case VZCTL_SETDEVPERMS: {
+			/* Device type was mistakenly declared as dev_t
+			 * in the old user-kernel interface.
+			 * That's wrong, dev_t is a kernel internal type.
+			 * I use `unsigned' not having anything better in mind.
+			 * 2001/08/11  SAW  */
+			struct vzctl_setdevperms s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = real_setdevperms(s.veid, s.type,
+					new_decode_dev(s.dev), s.mask);
+		}
+		break;
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	    case VZCTL_VE_NETDEV: {
+			struct vzctl_ve_netdev d;
+			char *s;
+			err = -EFAULT;
+			if (copy_from_user(&d, (void *)arg, sizeof(d)))
+				break;
+			err = -ENOMEM;
+			s = kmalloc(IFNAMSIZ+1, GFP_KERNEL);
+			if (s == NULL)
+				break;
+			strncpy_from_user(s, d.dev_name, IFNAMSIZ);
+			s[IFNAMSIZ] = 0;
+			err = real_ve_dev_map(d.veid, d.op, s);
+			kfree(s);
+		}
+		break;
+#endif
+	    case VZCTL_ENV_CREATE: {
+			struct vzctl_env_create s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = real_env_create(s.veid, s.flags, s.class_id,
+				NULL, 0);
+		}
+		break;
+	    case VZCTL_ENV_CREATE_DATA: {
+			struct vzctl_env_create_data s;
+			env_create_param_t *data;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err=-EINVAL;
+			if (s.datalen < VZCTL_ENV_CREATE_DATA_MINLEN ||
+			    s.datalen > VZCTL_ENV_CREATE_DATA_MAXLEN ||
+			    s.data == 0)
+				break;
+			err = -ENOMEM;
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				break;
+			memset(data, 0, sizeof(*data));
+			err = -EFAULT;
+			if (copy_from_user(data, (void *)s.data, s.datalen))
+				goto free_data;
+			err = real_env_create(s.veid, s.flags, s.class_id,
+				data, s.datalen);
+free_data:
+			kfree(data);
+		}
+		break;
+	    case VZCTL_GET_CPU_STAT: {
+			struct vzctl_cpustatctl s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = ve_get_cpu_stat(s.veid, s.cpustat);
+		}
+		break;
+	    case VZCTL_VE_MEMINFO: {
+			struct vzctl_ve_meminfo s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void *)arg, sizeof(s)))
+				break;
+			err = ve_set_meminfo(s.veid, s.val);
+		}
+		break;
+	}
+	return err;
+}
+EXPORT_SYMBOL(real_env_create);
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * Init/exit stuff
+ *
+ **********************************************************************
+ **********************************************************************/
+
+static int __init init_vecalls_symbols(void)
+{
+	KSYMRESOLVE(real_get_device_perms_ve);
+	KSYMRESOLVE(real_do_env_cleanup);
+	KSYMRESOLVE(real_do_env_free);
+	KSYMRESOLVE(real_update_load_avg_ve);
+	KSYMMODRESOLVE(vzmon);
+	return 0;
+}
+
+static void fini_vecalls_symbols(void)
+{
+	KSYMMODUNRESOLVE(vzmon);
+	KSYMUNRESOLVE(real_get_device_perms_ve);
+	KSYMUNRESOLVE(real_do_env_cleanup);
+	KSYMUNRESOLVE(real_do_env_free);
+	KSYMUNRESOLVE(real_update_load_avg_ve);
+}
+
+static inline __init int init_vecalls_ioctls(void)
+{
+	vzioctl_register(&vzcalls);
+	return 0;
+}
+
+static inline void fini_vecalls_ioctls(void)
+{
+	vzioctl_unregister(&vzcalls);
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *table_header;
+
+static ctl_table kernel_table[] = {
+	{
+		.ctl_name	= KERN_VE_ALLOW_KTHREADS,
+		.procname	= "ve_allow_kthreads",
+		.data		= &ve_allow_kthreads,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_VE_ALLOW_INIT_SIGNALS,
+		.procname	= "ve_allow_init_signals",
+		.data		= &ve_allow_init_signals,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ 0 }
+};
+
+static ctl_table root_table[] =  {
+	{CTL_KERN, "kernel",  NULL, 0, 0555, kernel_table},
+	{ 0 }
+};
+
+static int init_vecalls_sysctl(void)
+{
+	table_header = register_sysctl_table(root_table, 0);
+	if (!table_header)
+		return -ENOMEM ;
+	return 0;
+}
+
+static void fini_vecalls_sysctl(void)
+{
+	unregister_sysctl_table(table_header);
+} 
+#else
+static int init_vecalls_sysctl(void) { return 0; }
+static void fini_vecalls_sysctl(void) { ; }
+#endif
+
+static int __init vecalls_init(void)
+{
+	int err;
+	int i;
+
+	err = init_vecalls_sysctl();
+	if (err)
+		goto out_vzmond;
+
+	ve_list_head = get_ve0();
+	init_rwsem(&get_ve0()->op_sem);
+
+	err = init_vzmond();
+	if (err < 0)
+		goto out_sysctl;
+
+	err = init_devperms_hash();
+	if (err < 0)
+		goto out_perms;
+
+	err = init_vecalls_symbols();
+	if (err < 0)
+		goto out_sym;
+
+	err = init_vecalls_proc();
+	if (err < 0)
+		goto out_proc;
+
+	err = init_vecalls_ioctls();
+	if (err < 0)
+		goto out_ioctls;
+
+	for (i = 0; i < VE_MAX_HOOKS; i++)
+		INIT_LIST_HEAD(&ve_hooks[i]);
+
+	return 0;
+
+out_ioctls:
+	fini_vecalls_proc();
+out_proc:
+	fini_vecalls_symbols();
+out_sym:
+	fini_devperms_hash();
+out_perms:
+	fini_vzmond();
+out_sysctl:
+	fini_vecalls_sysctl();
+out_vzmond:
+	return err;
+}
+
+static void vecalls_exit(void)
+{
+	fini_vecalls_ioctls();
+	fini_vecalls_proc();
+	fini_vecalls_symbols();
+	fini_devperms_hash();
+	fini_vzmond();
+	fini_vecalls_sysctl();
+}
+
+EXPORT_SYMBOL(get_ve_by_id);
+EXPORT_SYMBOL(__find_ve_by_id);
+EXPORT_SYMBOL(ve_list_guard);
+EXPORT_SYMBOL(ve_list_head);
+EXPORT_SYMBOL(nr_ve);
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Control");
+MODULE_LICENSE("GPL v2");
+
+module_init(vecalls_init)
+module_exit(vecalls_exit)
diff -Nurap linux-2.6.9-100.orig/kernel/veowner.c linux-2.6.9-ve023stab054/kernel/veowner.c
--- linux-2.6.9-100.orig/kernel/veowner.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/veowner.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,309 @@
+/*
+ *  kernel/veowner.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/ve.h>
+#include <linux/ve_owner.h>
+#include <linux/ve_proto.h>
+#include <linux/ipc.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#include <net/tcp.h>
+
+void prepare_ve0_process(struct task_struct *tsk)
+{
+	set_virt_pid(tsk, tsk->pid);
+	set_virt_tgid(tsk, tsk->tgid);
+	if (tsk->signal) {
+		set_virt_pgid(tsk, tsk->signal->pgrp);
+		set_virt_sid(tsk, tsk->signal->session);
+	}
+	VE_TASK_INFO(tsk)->exec_env = get_ve0();
+	VE_TASK_INFO(tsk)->owner_env = get_ve0();
+	VE_TASK_INFO(tsk)->sleep_time = 0;
+	VE_TASK_INFO(tsk)->wakeup_stamp = 0;
+	VE_TASK_INFO(tsk)->sched_time = 0;
+	seqcount_init(&VE_TASK_INFO(tsk)->wakeup_lock);
+
+	if (tsk->pid) {
+		SET_VE_LINKS(tsk);
+		atomic_inc(&get_ve0()->pcounter);
+	}
+}
+
+void prepare_ve0_loopback(void)
+{
+	get_ve0()->_loopback_dev = &loopback_dev;
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * proc entries
+ * ------------------------------------------------------------------------
+ */
+
+static void proc_move(struct proc_dir_entry *ddir,
+		struct proc_dir_entry *sdir,
+		const char *name)
+{
+	struct proc_dir_entry **p, *q;
+	int len;
+
+	len = strlen(name);
+	for (p = &sdir->subdir, q = *p; q != NULL; p = &q->next, q = *p)
+		if (proc_match(len, name, q))
+			break;
+	if (q == NULL)
+		return;
+	*p = q->next;
+	q->parent = ddir;
+	q->next = ddir->subdir;
+	ddir->subdir = q;
+}
+static void prepare_proc_misc(void)
+{
+	static char *table[] = {
+		"loadavg",
+		"uptime",
+		"meminfo",
+		"version",
+		"stat",
+		"filesystems",
+		"locks",
+		"swaps",
+		"mounts",
+		"cpuinfo",
+		"net",
+		"sysvipc",
+		"sys",
+		"fs",
+		"vz",
+		"user_beancounters",
+		"cmdline",
+		"vmstat",
+		"modules",
+		"kmsg",
+		"devices",
+		NULL,
+	};
+	char **p;
+
+	for (p = table; *p != NULL; p++)
+		proc_move(&proc_root, ve0.proc_root, *p);
+}
+int prepare_proc(void)
+{
+	struct ve_struct *envid;
+	struct proc_dir_entry *de;
+	struct proc_dir_entry *ve_root;
+
+	envid = set_exec_env(&ve0);
+	ve_root = ve0.proc_root->subdir;
+	/* move the whole tree to be visible in VE0 only */
+	ve0.proc_root->subdir = proc_root.subdir;
+	for (de = ve0.proc_root->subdir; de->next != NULL; de = de->next)
+		de->parent = ve0.proc_root;
+	de->parent = ve0.proc_root;
+	de->next = ve_root;
+
+	/* move back into the global scope some specific entries */
+	proc_root.subdir = NULL;
+	prepare_proc_misc();
+	proc_mkdir("net", 0);
+	proc_mkdir("net/stat", 0);
+	proc_mkdir("vz", 0);
+#ifdef CONFIG_SYSVIPC
+	proc_mkdir("sysvipc", 0);
+#endif
+	proc_root_fs = proc_mkdir("fs", 0);
+	/* XXX proc_tty_init(); */
+
+	/* XXX process inodes */
+
+	(void)set_exec_env(envid);
+
+	(void)create_proc_glob_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+	return 0;
+}
+
+static struct proc_dir_entry ve0_proc_root = {
+	.name = "/proc",
+	.namelen = 5,
+	.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+	.nlink = 2
+};
+
+void prepare_ve0_proc_root(void)
+{
+	ve0.proc_root = &ve0_proc_root;
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * Virtualized sysctl
+ * ------------------------------------------------------------------------
+ */
+
+static int semmin[4] = { 1, 1, 1, 1 };
+static int semmax[4] = { 8000, INT_MAX, 1000, IPCMNI };
+static ctl_table kern_table[] = {
+	{KERN_NODENAME, "hostname", system_utsname.nodename, 64,
+	 0644, NULL, &proc_doutsstring, &sysctl_string},
+	{KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
+	 0644, NULL, &proc_doutsstring, &sysctl_string},
+#ifdef CONFIG_SYSVIPC
+#define get_ve0_field(fname) &ve0._##fname
+	{KERN_SHMMAX, "shmmax", get_ve0_field(shm_ctlmax), sizeof (size_t),
+	 0644, NULL, &proc_doulongvec_minmax },
+	{KERN_SHMALL, "shmall", get_ve0_field(shm_ctlall), sizeof (size_t),
+	 0644, NULL, &proc_doulongvec_minmax },
+	{KERN_SHMMNI, "shmmni", get_ve0_field(shm_ctlmni), sizeof (int),
+	 0644, NULL, &proc_dointvec_minmax, NULL,
+	 NULL, &semmin[0], &semmax[3] },
+	{KERN_MSGMAX, "msgmax", get_ve0_field(msg_ctlmax), sizeof (int),
+	 0644, NULL, &proc_dointvec },
+	{KERN_MSGMNI, "msgmni", get_ve0_field(msg_ctlmni), sizeof (int),
+	 0644, NULL, &proc_dointvec_minmax, NULL,
+	 NULL, &semmin[0], &semmax[3] },
+	{KERN_MSGMNB, "msgmnb", get_ve0_field(msg_ctlmnb), sizeof (int),
+	 0644, NULL, &proc_dointvec },
+	{KERN_SEM, "sem", get_ve0_field(sem_ctls), 4*sizeof (int),
+	 0644, NULL, &proc_dointvec },
+#endif
+	{0}
+};
+static ctl_table root_table[] = {
+	{CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
+	{0}
+};
+extern int ip_rt_src_check;
+extern int ve_area_access_check;
+int sysctl_fsync_enable = 1;
+static ctl_table ipv4_route_table[] = {
+	{
+		ctl_name:	NET_IPV4_ROUTE_SRC_CHECK,
+		procname:	"src_check",
+		data:		&ip_rt_src_check,
+		maxlen:		sizeof(int),
+		mode:		0644,
+		proc_handler:	&proc_dointvec,
+	},
+	{ 0 }
+};
+static ctl_table ipv4_table[] = {
+	{NET_IPV4_ROUTE, "route", NULL, 0, 0555, ipv4_route_table},
+	{ 0 }
+};
+static ctl_table net_table[] = {
+	{NET_IPV4,   "ipv4",      NULL, 0, 0555, ipv4_table},
+	{ 0 }
+};
+static ctl_table fs_table[] = {
+	{
+		ctl_name:	226,
+		procname:	"ve-area-access-check",
+		data:		&ve_area_access_check,
+		maxlen:		sizeof(int),
+		mode:		0644,
+		proc_handler:	&proc_dointvec,
+	},
+	{
+		.ctl_name	= 227,
+		.procname	= "fsync-enable",
+		.data		= &sysctl_fsync_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ 0 }
+};
+static ctl_table root_table2[] = {
+	{CTL_NET, "net", NULL, 0, 0555, net_table},
+	{CTL_FS, "fs", NULL, 0, 0555, fs_table},
+	{ 0 }
+};
+int prepare_sysctl(void)
+{
+	struct ve_struct *envid;
+
+	envid = set_exec_env(&ve0);
+	ve0.kern_header = register_sysctl_table(root_table, 1);
+	register_sysctl_table(root_table2, 0);
+	(void)set_exec_env(envid);
+	return 0;
+}
+
+void prepare_ve0_sysctl(void)
+{
+	INIT_LIST_HEAD(&ve0.sysctl_lh);
+#ifdef CONFIG_SYSCTL
+	ve0.proc_sys_root = proc_mkdir("sys", 0);
+#endif
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * XXX init_ve_system
+ * ------------------------------------------------------------------------
+ */
+
+void init_ve_system(void)
+{
+	struct task_struct *init_entry, *p, *tsk;
+	struct ve_struct *ptr;
+	unsigned long flags;
+	int i;
+
+	ptr = get_ve0();
+	(void)get_ve(ptr);
+	atomic_set(&ptr->pcounter, 1);
+
+	/* Don't forget about idle tasks */
+	write_lock_irqsave(&tasklist_lock, flags);
+	for (i = 0; i < NR_CPUS; i++) {
+		tsk = idle_task(i);
+		if (tsk == NULL)
+			continue;
+
+		prepare_ve0_process(tsk);
+	}
+	do_each_thread_all(p, tsk) {
+		prepare_ve0_process(tsk);
+	} while_each_thread_all(p, tsk);
+	write_unlock_irqrestore(&tasklist_lock, flags);
+
+	init_entry = child_reaper;
+	ptr->init_entry = init_entry;
+	/* XXX: why? */
+	cap_set_full(ptr->cap_default);
+
+	ptr->_ipv4_devconf = &ipv4_devconf;
+	ptr->_ipv4_devconf_dflt = &ipv4_devconf_dflt;
+
+	read_lock(&init_entry->fs->lock);
+	ptr->fs_rootmnt = init_entry->fs->rootmnt;
+	ptr->fs_root = init_entry->fs->root;
+	read_unlock(&init_entry->fs->lock);
+
+	/* common prepares */
+	prepare_proc();
+	prepare_sysctl();
+	prepare_ipc();
+}
diff -Nurap linux-2.6.9-100.orig/kernel/vzdev.c linux-2.6.9-ve023stab054/kernel/vzdev.c
--- linux-2.6.9-100.orig/kernel/vzdev.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/vzdev.c	2011-06-15 19:26:19.000000000 +0400
@@ -0,0 +1,97 @@
+/*
+ *  kernel/vzdev.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/vzctl.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/vzcalluser.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+
+#define VZCTL_MAJOR 126
+#define VZCTL_NAME "vzctl"
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Interface");
+MODULE_LICENSE("GPL v2");
+
+static LIST_HEAD(ioctls);
+static spinlock_t ioctl_lock = SPIN_LOCK_UNLOCKED;
+
+int vzctl_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	int err;
+	struct list_head *p;
+	struct vzioctlinfo *inf;
+
+	err = -ENOTTY;
+	spin_lock(&ioctl_lock);
+	list_for_each(p, &ioctls) {
+		inf = list_entry(p, struct vzioctlinfo, list);
+		if (inf->type != _IOC_TYPE(cmd))
+			continue;
+
+		err = try_module_get(inf->owner) ? 0 : -EBUSY;
+		spin_unlock(&ioctl_lock);
+		if (!err) {
+			err = (*inf->func)(ino, file, cmd, arg);
+			module_put(inf->owner);
+		}
+		return err;
+	}
+	spin_unlock(&ioctl_lock);
+	return err;
+}
+
+void vzioctl_register(struct vzioctlinfo *inf)
+{
+	spin_lock(&ioctl_lock);
+	list_add(&inf->list, &ioctls);
+	spin_unlock(&ioctl_lock);
+}
+
+void vzioctl_unregister(struct vzioctlinfo *inf)
+{
+	spin_lock(&ioctl_lock);
+	list_del_init(&inf->list);
+	spin_unlock(&ioctl_lock);
+}
+
+EXPORT_SYMBOL(vzioctl_register);
+EXPORT_SYMBOL(vzioctl_unregister);
+
+/*
+ * Init/exit stuff.
+ */
+static struct file_operations vzctl_fops = {
+	.owner		= THIS_MODULE,
+	.ioctl		= vzctl_ioctl,
+};
+
+static void __exit vzctl_exit(void)
+{
+	unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
+}
+
+static int __init vzctl_init(void)
+{
+	int ret;
+
+	ret = register_chrdev(VZCTL_MAJOR, VZCTL_NAME, &vzctl_fops);
+	return ret;
+}
+
+module_init(vzctl_init)
+module_exit(vzctl_exit);
diff -Nurap linux-2.6.9-100.orig/kernel/vzevent.c linux-2.6.9-ve023stab054/kernel/vzevent.c
--- linux-2.6.9-100.orig/kernel/vzevent.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/vzevent.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,163 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/netlink.h>
+#include <linux/errno.h>
+#include <linux/ve_proto.h>
+#include <linux/vzevent.h>
+
+#define VZ_EVGRP_ALL	0x01
+
+static int reboot_event;
+module_param(reboot_event, int, 0644);
+MODULE_PARM_DESC(reboot_event, "Enable reboot events");
+
+/*
+ * NOTE: the original idea was to send events via kobject_uevent(),
+ * however, it turns out that it has negative consequences like
+ * start of /sbin/hotplug which tries to react on our events in inadequate manner.
+ */
+
+static struct sock *vzev_sock;
+
+static char *action_to_string(int action)
+{
+	switch (action) {
+		case KOBJ_MOUNT:
+			return "ve-mount";
+		case KOBJ_UMOUNT:
+			return "ve-umount";
+		case KOBJ_START:
+			return "ve-start";
+		case KOBJ_STOP:
+			return "ve-stop";
+		case KOBJ_REBOOT:
+			return "ve-reboot";
+		default:
+			return NULL;
+	}
+}
+
+static int do_vzevent_send(int event, char *msg, int len)
+{
+	struct sk_buff *skb;
+	char *buf, *action;
+	int alen;
+
+	action = action_to_string(event);
+	if (!action)
+		return -EINVAL;
+
+	alen = strlen(action);
+
+	skb = alloc_skb(len + 1 + alen, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	buf = skb_put(skb, len + 1 + alen);
+	memcpy(buf, action, alen);
+	buf[alen] = '@';
+	memcpy(buf + alen + 1, msg, len);
+	(void)netlink_broadcast(vzev_sock, skb, 0, VZ_EVGRP_ALL, GFP_KERNEL);
+	return 0;
+}
+
+int vzevent_send(int event, const char *attrs_fmt, ...)
+{
+	va_list args;
+	int len, err;
+	struct ve_struct *ve;
+	char *page;
+
+	err = -ENOMEM;
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (!page)
+		goto out;
+
+	va_start(args, attrs_fmt);
+	len = vscnprintf(page, PAGE_SIZE, attrs_fmt, args);
+	va_end(args);
+
+	ve = set_exec_env(get_ve0());
+	err = do_vzevent_send(event, page, len);
+	(void)set_exec_env(ve);
+	free_page((unsigned long)page);
+out:
+	return err;
+}
+EXPORT_SYMBOL(vzevent_send);
+
+static int ve_event(unsigned int hnum, void *data)
+{
+	struct ve_struct *ve;
+	int event;
+
+	ve = (struct ve_struct *)data;
+	switch (hnum) {
+		case VE_HOOK_INIT:
+			vzevent_send(KOBJ_START, "%d", VEID(ve));
+			break;
+		case VE_HOOK_FINI:
+			event = KOBJ_STOP;
+			if (reboot_event && test_and_clear_bit(VE_REBOOT,
+						&get_exec_env()->flags))
+				event = KOBJ_REBOOT;
+
+			vzevent_send(event, "%d", VEID(ve));
+			break;
+	}
+	return 0;
+}
+
+static struct ve_hook ve_start_event = {
+	.hook		= ve_event,
+	.hooknum	= VE_HOOK_INIT,
+	.priority	= INT_MAX,
+	.owner		= THIS_MODULE
+};
+
+static struct ve_hook ve_stop_event = {
+	.hook		= ve_event,
+	.hooknum	= VE_HOOK_FINI,
+	.priority	= INT_MAX,
+	.owner		= THIS_MODULE
+};
+
+static int __init init_vzevent(void)
+{
+	int ret;
+	ret = -ENOMEM;
+
+	vzev_sock = netlink_kernel_create(NETLINK_VZEVENT, NULL);
+	if (vzev_sock == NULL)
+		goto out;
+
+	ret = ve_hook_register(&ve_start_event);
+	if (ret)
+		goto out_h1;
+	ret = ve_hook_register(&ve_stop_event);
+	if (ret)
+		goto out_h2;
+
+	return 0;
+
+out_h2:
+	ve_hook_unregister(&ve_start_event);
+out_h1:
+	sock_release(vzev_sock->sk_socket);
+out:
+	printk(KERN_ERR"VZEVENT: failed to init module %d\n", ret);
+	return ret;
+}
+
+static void __exit exit_vzevent(void)
+{
+	ve_hook_unregister(&ve_start_event);
+	ve_hook_unregister(&ve_stop_event);
+	sock_release(vzev_sock->sk_socket);
+}
+
+module_init(init_vzevent);
+module_exit(exit_vzevent);
diff -Nurap linux-2.6.9-100.orig/kernel/vzwdog.c linux-2.6.9-ve023stab054/kernel/vzwdog.c
--- linux-2.6.9-100.orig/kernel/vzwdog.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/kernel/vzwdog.c	2011-06-15 19:26:20.000000000 +0400
@@ -0,0 +1,302 @@
+/*
+ *  kernel/vzwdog.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/kobject.h>
+#include <linux/genhd.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/suspend.h>
+#include <linux/ve.h>
+#include <linux/vzstat.h>
+#include <asm/uaccess.h>
+
+/* Staff regading kernel thread polling VE validity */
+static int sleep_timeout = 60;
+static pid_t wdog_thread_pid;
+static int   wdog_thread_continue = 1;
+static DECLARE_COMPLETION(license_thread_exited);
+
+extern void show_mem(void);
+extern struct ve_struct *ve_list_head;
+
+static struct file *intr_file;
+static char page[PAGE_SIZE];
+
+static void parse_irq_list(int len)
+{
+	int i, k, skip;
+	for (i = 0; i < len; ) {
+		k = i;
+		while (i < len && page[i] != '\n' && page[i] != ':')
+			i++;
+		skip = 0;
+		if (i < len && page[i] != '\n') {
+			i++; /* skip ':' */
+			while (i < len && (page[i] == ' ' || page[i] == '0'))
+				i++;
+			skip = (i < len && (page[i] < '0' || page[i] > '9'));
+			while (i < len && page[i] != '\n')
+				i++;
+		}
+		if (!skip)
+			printk("%.*s\n", i - k, page + k);
+		if (i < len)
+			i++; /* skip '\n' */
+	}
+}
+
+extern loff_t vfs_llseek(struct file *file, loff_t, int);
+extern ssize_t vfs_read(struct file *file, char __user *, size_t, loff_t *);
+extern struct file *filp_open(const char *filename, int flags, int mode);
+extern int filp_close(struct file *filp, fl_owner_t id);
+static void show_irq_list(void)
+{
+	mm_segment_t fs;
+	int r;
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	vfs_llseek(intr_file, 0, 0);
+	r = vfs_read(intr_file, page, sizeof(page), &intr_file->f_pos);
+	set_fs(fs);
+
+	if (r > 0)
+		parse_irq_list(r);
+}
+
+static void show_alloc_latency(void)
+{
+	static const char *alloc_descr[KSTAT_ALLOCSTAT_NR] = {
+		"A0",
+		"L0",
+		"H0",
+		"L1",
+		"H1"
+	};
+	int i;
+
+	printk("lat: ");
+	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
+		struct kstat_lat_struct *p;
+		cycles_t maxlat, avg0, avg1, avg2;
+
+		p = &kstat_glob.alloc_lat[i];
+		spin_lock_irq(&kstat_glb_lock);
+		maxlat = p->last.maxlat;
+		avg0 = p->avg[0];
+		avg1 = p->avg[1];
+		avg2 = p->avg[2];
+		spin_unlock_irq(&kstat_glb_lock);
+
+		printk("%s %Lu (%Lu %Lu %Lu)",
+				alloc_descr[i],
+				maxlat,
+				avg0,
+				avg1,
+				avg2);
+	}
+	printk("\n");
+}
+
+static void show_schedule_latency(void)
+{
+	struct kstat_lat_pcpu_struct *p;
+	cycles_t maxlat, totlat, avg0, avg1, avg2;
+	unsigned long count;
+
+	p = &kstat_glob.sched_lat;
+	spin_lock_irq(&kstat_glb_lock);
+	maxlat = p->last.maxlat;
+	totlat = p->last.totlat;
+	count = p->last.count;
+	avg0 = p->avg[0];
+	avg1 = p->avg[1];
+	avg2 = p->avg[2];
+	spin_unlock_irq(&kstat_glb_lock);
+
+	printk("sched lat: %Lu/%Lu/%lu (%Lu %Lu %Lu)\n",
+			maxlat,
+			totlat,
+			count,
+			avg0,
+			avg1,
+			avg2);
+}
+
+static void show_header(void)
+{
+	struct timeval tv;
+
+	do_gettimeofday(&tv);
+	preempt_disable();
+	printk("*** VZWDOG 1.14: time %lu.%06lu uptime %Lu CPU %d ***\n",
+			tv.tv_sec, tv.tv_usec,
+			get_jiffies_64(), smp_processor_id());
+#if 0
+	printk("*** cycles_per_jiffy %lu jiffies_per_second %u ***\n",
+			cycles_per_jiffy, HZ);
+#endif
+	preempt_enable();
+}
+
+static void show_pgdatinfo(void)
+{
+	pg_data_t *pgdat;
+
+	printk("pgdat:");
+	for_each_pgdat(pgdat) {
+		printk(" %d: %lu,%lu,%lu,%p",
+			pgdat->node_id,
+			pgdat->node_start_pfn,
+			pgdat->node_present_pages,
+			pgdat->node_spanned_pages,
+			pgdat->node_mem_map);
+	}
+	printk("\n");
+}
+
+extern struct subsystem *get_block_subsys(void);
+static void show_diskio(void)
+{
+	struct gendisk *gd;
+	struct subsystem *block_subsys;
+	char buf[BDEVNAME_SIZE];
+
+	printk("disk_io: ");
+
+	block_subsys = get_block_subsys();
+	down_read(&block_subsys->rwsem);
+	list_for_each_entry(gd, &block_subsys->kset.list, kobj.entry) {
+		char *name;
+		name = disk_name(gd, 0, buf);
+		if ((strlen(name) > 4) && (strncmp(name, "loop", 4) == 0) &&
+		    isdigit(name[4]))
+			continue;
+		if ((strlen(name) > 3) && (strncmp(name, "ram", 3) == 0) &&
+		    isdigit(name[3]))
+			continue;
+		printk("(%u,%u) %s r(%u %u %u) w(%u %u %u)\n",
+			gd->major, gd->first_minor,
+			name,
+			disk_stat_read(gd, reads),
+			disk_stat_read(gd, read_sectors),
+			disk_stat_read(gd, read_merges),
+			disk_stat_read(gd, writes),
+			disk_stat_read(gd, write_sectors),
+			disk_stat_read(gd, write_merges));
+	}
+	up_read(&block_subsys->rwsem);
+
+	printk("\n");
+}
+
+static void show_nrprocs(void)
+{
+	unsigned long _nr_running, _nr_sleeping,
+			_nr_unint, _nr_zombie, _nr_dead, _nr_stopped;
+
+	_nr_running = nr_running();
+	_nr_unint = nr_uninterruptible();
+	_nr_sleeping = nr_sleeping();
+	_nr_zombie = nr_zombie;
+	_nr_dead = atomic_read(&nr_dead);
+	_nr_stopped = nr_stopped();
+
+	printk("VEnum: %d, proc R %lu, S %lu, D %lu, "
+		"Z %lu, X %lu, T %lu (tot %d)\n",
+		nr_ve,	_nr_running, _nr_sleeping, _nr_unint,
+		_nr_zombie, _nr_dead, _nr_stopped, nr_threads);
+}
+
+static void wdog_print(void)
+{
+	show_header();
+	show_irq_list();
+	show_pgdatinfo();
+	show_mem();
+	show_diskio();
+	show_schedule_latency();
+	show_alloc_latency();
+	show_nrprocs();
+}
+
+static int wdog_loop(void* data)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAIT_QUEUE_HEAD(thread_wait_queue);
+
+	/*
+	 * This thread doesn't need any user-level access,
+	 * so get rid of all our resources
+	 */
+	daemonize("wdogd");
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	sigfillset(&tsk->blocked);
+	sigdelset(&tsk->blocked, SIGHUP);
+	recalc_sigpending();
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	while (wdog_thread_continue) {
+		wdog_print();
+		interruptible_sleep_on_timeout(&thread_wait_queue,
+					       sleep_timeout*HZ);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
+		/* clear all signals */
+		if (signal_pending(tsk))
+			flush_signals(tsk);
+	}
+
+	complete_and_exit(&license_thread_exited, 0);
+}
+
+static int __init wdog_init(void)
+{
+	struct file *file;
+
+	file = filp_open("/proc/interrupts", 0, 0);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+	intr_file = file;
+
+	wdog_thread_pid = kernel_thread(wdog_loop, NULL, 0);
+	if (wdog_thread_pid < 0) {
+		filp_close(intr_file, NULL);
+		return wdog_thread_pid;
+	}
+	return 0;
+}
+
+static void __exit wdog_exit(void)
+{
+	wdog_thread_continue = 0;
+	if (wdog_thread_pid > 0) {
+		kill_proc(wdog_thread_pid, SIGHUP, 1);
+		wait_for_completion(&license_thread_exited);
+	}
+	filp_close(intr_file, NULL);
+}
+
+module_param(sleep_timeout, int, 0666);
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo WDOG");
+MODULE_LICENSE("GPL v2");
+
+module_init(wdog_init)
+module_exit(wdog_exit)
diff -Nurap linux-2.6.9-100.orig/lib/Kconfig.debug linux-2.6.9-ve023stab054/lib/Kconfig.debug
--- linux-2.6.9-100.orig/lib/Kconfig.debug	2004-10-19 01:54:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/lib/Kconfig.debug	2011-06-15 19:26:22.000000000 +0400
@@ -20,6 +20,14 @@ config MAGIC_SYSRQ
 	  keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
 	  unless you really know what this hack does.
 
+config SYSRQ_DEBUG
+	bool "Debugging via sysrq keys"
+	depends on MAGIC_SYSRQ
+	help
+	  Say Y if you want to extend functionality of magic key. It will
+	  provide you with some debugging facilities such as dumping and
+	  writing memory, resolving symbols and some other.
+
 config MAGIC_SYSRQ
 	bool "Magic SysRq key"
 	depends on DEBUG_KERNEL && (H8300 || M68KNOMMU || V850)
@@ -52,6 +60,13 @@ config DEBUG_SPINLOCK_SLEEP
 	  If you say Y here, various routines which may sleep will become very
 	  noisy if they are called with a spinlock held.
 
+config DEBUG_KOBJECT
+	bool "kobject debugging"
+	depends on DEBUG_KERNEL
+	help
+	  If you say Y here, some extra kobject debugging messages will be sent
+	  to the syslog. 
+
 config DEBUG_HIGHMEM
 	bool "Highmem debugging"
 	depends on DEBUG_KERNEL && HIGHMEM && (X86 || PPC32 || MIPS || SPARC32)
diff -Nurap linux-2.6.9-100.orig/lib/Makefile linux-2.6.9-ve023stab054/lib/Makefile
--- linux-2.6.9-100.orig/lib/Makefile	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/lib/Makefile	2011-06-15 19:26:22.000000000 +0400
@@ -2,11 +2,15 @@
 # Makefile for some libs needed in the kernel.
 #
 
-
 lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
 	 bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
 	 kobject.o kref.o idr.o div64.o parser.o int_sqrt.o \
-	 bitmap.o extable.o
+	 bitmap.o extable.o kobject_uevent.o
+
+ifeq ($(CONFIG_DEBUG_KOBJECT),y)
+CFLAGS_kobject.o += -DDEBUG
+CFLAGS_kobject_uevent.o += -DDEBUG
+endif
 
 obj-y := sort.o
 
diff -Nurap linux-2.6.9-100.orig/lib/bust_spinlocks.c linux-2.6.9-ve023stab054/lib/bust_spinlocks.c
--- linux-2.6.9-100.orig/lib/bust_spinlocks.c	2004-10-19 01:53:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/lib/bust_spinlocks.c	2011-06-15 19:26:22.000000000 +0400
@@ -13,27 +13,20 @@
 #include <linux/tty.h>
 #include <linux/wait.h>
 #include <linux/vt_kern.h>
-
+#include <linux/console.h>
 
 void bust_spinlocks(int yes)
 {
+	if (printk_no_wake)
+		return;
+
 	if (yes) {
 		oops_in_progress = 1;
 	} else {
-		int loglevel_save = console_loglevel;
 #ifdef CONFIG_VT
 		unblank_screen();
 #endif
 		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk() will give klogd
-		 * and the blanked console a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
-		printk(" ");
-		console_loglevel = loglevel_save;
+		wake_up_klogd();
 	}
 }
-
-
diff -Nurap linux-2.6.9-100.orig/lib/idr.c linux-2.6.9-ve023stab054/lib/idr.c
--- linux-2.6.9-100.orig/lib/idr.c	2011-06-09 19:22:48.000000000 +0400
+++ linux-2.6.9-ve023stab054/lib/idr.c	2011-06-15 19:26:18.000000000 +0400
@@ -51,15 +51,21 @@ static struct idr_layer *alloc_layer(str
 	return(p);
 }
 
+/* only called when idp->lock is held */
+static void __free_layer(struct idr *idp, struct idr_layer *p)
+{
+	p->ary[0] = idp->id_free;
+	idp->id_free = p;
+	idp->id_free_cnt++;
+}
+
 static void free_layer(struct idr *idp, struct idr_layer *p)
 {
 	/*
 	 * Depends on the return element being zeroed.
 	 */
 	spin_lock(&idp->lock);
-	p->ary[0] = idp->id_free;
-	idp->id_free = p;
-	idp->id_free_cnt++;
+	__free_layer(idp, p);
 	spin_unlock(&idp->lock);
 }
 
@@ -187,12 +193,14 @@ build_up:
 			 * The allocation failed.  If we built part of
 			 * the structure tear it down.
 			 */
+			spin_lock(&idp->lock);
 			for (new = p; p && p != idp->top; new = p) {
 				p = p->ary[0];
 				new->ary[0] = NULL;
 				new->bitmap = new->count = 0;
-				free_layer(idp, new);
+				__free_layer(idp, new);
 			}
+			spin_unlock(&idp->lock);
 			return -1;
 		}
 		new->ary[0] = p;
diff -Nurap linux-2.6.9-100.orig/lib/inflate.c linux-2.6.9-ve023stab054/lib/inflate.c
--- linux-2.6.9-100.orig/lib/inflate.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/lib/inflate.c	2011-06-15 19:26:18.000000000 +0400
@@ -322,7 +322,7 @@ DEBG("huft1 ");
   {
     *t = (struct huft *)NULL;
     *m = 0;
-    return 0;
+    return 2;
   }
 
 DEBG("huft2 ");
diff -Nurap linux-2.6.9-100.orig/lib/kobject.c linux-2.6.9-ve023stab054/lib/kobject.c
--- linux-2.6.9-100.orig/lib/kobject.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/lib/kobject.c	2011-06-15 19:26:22.000000000 +0400
@@ -10,8 +10,6 @@
  * about using the kobject interface.
  */
 
-#undef DEBUG
-
 #include <linux/kobject.h>
 #include <linux/string.h>
 #include <linux/module.h>
@@ -63,7 +61,7 @@ static inline struct kobject * to_kobj(s
 	return container_of(entry,struct kobject,entry);
 }
 
-static int get_kobj_path_length(struct kset *kset, struct kobject *kobj)
+static int get_kobj_path_length(struct kobject *kobj)
 {
 	int length = 1;
 	struct kobject * parent = kobj;
@@ -81,7 +79,7 @@ static int get_kobj_path_length(struct k
 	return length;
 }
 
-static void fill_kobj_path(struct kset *kset, struct kobject *kobj, char *path, int length)
+static void fill_kobj_path(struct kobject *kobj, char *path, int length)
 {
 	struct kobject * parent;
 
@@ -101,155 +99,33 @@ static void fill_kobj_path(struct kset *
  * kobject_get_path - generate and return the path associated with a given kobj
  * and kset pair.  The result must be freed by the caller with kfree().
  *
- * @kset:	kset in question, with which to build the path
  * @kobj:	kobject in question, with which to build the path
  * @gfp_mask:	the allocation type used to allocate the path
  */
-char * kobject_get_path(struct kset *kset, struct kobject *kobj, int gfp_mask)
+char *kobject_get_path(struct kobject *kobj, int gfp_mask)
 {
 	char *path;
 	int len;
 
-	len = get_kobj_path_length(kset, kobj);
+	len = get_kobj_path_length(kobj);
 	if (len == 0)
 		return NULL;
 	path = kmalloc(len, gfp_mask);
 	if (!path)
 		return NULL;
 	memset(path, 0x00, len);
-	fill_kobj_path(kset, kobj, path, len);
+	fill_kobj_path(kobj, path, len);
 
 	return path;
 }
 
-#ifdef CONFIG_HOTPLUG
-
-#define BUFFER_SIZE	1024	/* should be enough memory for the env */
-#define NUM_ENVP	32	/* number of env pointers */
-static unsigned long sequence_num;
-static spinlock_t sequence_lock = SPIN_LOCK_UNLOCKED;
-
-static void kset_hotplug(const char *action, struct kset *kset,
-			 struct kobject *kobj)
-{
-	char *argv [3];
-	char **envp = NULL;
-	char *buffer = NULL;
-	char *scratch;
-	int i = 0;
-	int retval;
-	char *kobj_path = NULL;
-	char *name = NULL;
-	unsigned long seq;
-
-	/* If the kset has a filter operation, call it. If it returns
-	   failure, no hotplug event is required. */
-	if (kset->hotplug_ops->filter) {
-		if (!kset->hotplug_ops->filter(kset, kobj))
-			return;
-	}
-
-	pr_debug ("%s\n", __FUNCTION__);
-
-	if (!hotplug_path[0])
-		return;
-
-	envp = kmalloc(NUM_ENVP * sizeof (char *), GFP_KERNEL);
-	if (!envp)
-		return;
-	memset (envp, 0x00, NUM_ENVP * sizeof (char *));
-
-	buffer = kmalloc(BUFFER_SIZE, GFP_KERNEL);
-	if (!buffer)
-		goto exit;
-
-	if (kset->hotplug_ops->name)
-		name = kset->hotplug_ops->name(kset, kobj);
-	if (name == NULL)
-		name = kset->kobj.name;
-
-	argv [0] = hotplug_path;
-	argv [1] = name;
-	argv [2] = NULL;
-
-	/* minimal command environment */
-	envp [i++] = "HOME=/";
-	envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-
-	scratch = buffer;
-
-	envp [i++] = scratch;
-	scratch += sprintf(scratch, "ACTION=%s", action) + 1;
-
-	spin_lock(&sequence_lock);
-	seq = sequence_num++;
-	spin_unlock(&sequence_lock);
-
-	envp [i++] = scratch;
-	scratch += sprintf(scratch, "SEQNUM=%ld", seq) + 1;
-
-	kobj_path = kobject_get_path(kset, kobj, GFP_KERNEL);
-	if (!kobj_path)
-		goto exit;
-
-	envp [i++] = scratch;
-	scratch += sprintf (scratch, "DEVPATH=%s", kobj_path) + 1;
-
-	if (kset->hotplug_ops->hotplug) {
-		/* have the kset specific function add its stuff */
-		retval = kset->hotplug_ops->hotplug (kset, kobj,
-				  &envp[i], NUM_ENVP - i, scratch,
-				  BUFFER_SIZE - (scratch - buffer));
-		if (retval) {
-			pr_debug ("%s - hotplug() returned %d\n",
-				  __FUNCTION__, retval);
-			goto exit;
-		}
-	}
-
-	pr_debug ("%s: %s %s %s %s %s %s %s\n", __FUNCTION__, argv[0], argv[1],
-		  envp[0], envp[1], envp[2], envp[3], envp[4]);
-	retval = call_usermodehelper (argv[0], argv, envp, 0);
-	if (retval)
-		pr_debug ("%s - call_usermodehelper returned %d\n",
-			  __FUNCTION__, retval);
-
-exit:
-	kfree(kobj_path);
-	kfree(buffer);
-	kfree(envp);
-	return;
-}
-
-void kobject_hotplug(const char *action, struct kobject *kobj)
-{
-	struct kobject * top_kobj = kobj;
-
-	/* If this kobj does not belong to a kset,
-	   try to find a parent that does. */
-	if (!top_kobj->kset && top_kobj->parent) {
-		do {
-			top_kobj = top_kobj->parent;
-		} while (!top_kobj->kset && top_kobj->parent);
-	}
-
-	if (top_kobj->kset && top_kobj->kset->hotplug_ops)
-		kset_hotplug(action, top_kobj->kset, kobj);
-}
-#else
-void kobject_hotplug(const char *action, struct kobject *kobj)
-{
-	return;
-}
-#endif	/* CONFIG_HOTPLUG */
-
 /**
  *	kobject_init - initialize object.
  *	@kobj:	object in question.
  */
 void kobject_init(struct kobject * kobj)
 {
- 	kref_init(&kobj->kref);
+	kref_init(&kobj->kref);
 	INIT_LIST_HEAD(&kobj->entry);
 	kobj->kset = kset_get(kobj->kset);
 }
@@ -312,7 +188,7 @@ int kobject_add(struct kobject * kobj)
 		if (parent)
 			kobject_put(parent);
 	} else {
-		kobject_hotplug("add", kobj);
+		kobject_hotplug(kobj, KOBJ_ADD);
 	}
 
 	return error;
@@ -426,7 +302,7 @@ int kobject_rename(struct kobject * kobj
 
 void kobject_del(struct kobject * kobj)
 {
-	kobject_hotplug("remove", kobj);
+	kobject_hotplug(kobj, KOBJ_REMOVE);
 	sysfs_remove_dir(kobj);
 	unlink(kobj);
 }
@@ -658,7 +534,6 @@ EXPORT_SYMBOL(kobject_put);
 EXPORT_SYMBOL(kobject_add);
 EXPORT_SYMBOL(kobject_del);
 EXPORT_SYMBOL(kobject_rename);
-EXPORT_SYMBOL(kobject_hotplug);
 
 EXPORT_SYMBOL(kset_register);
 EXPORT_SYMBOL(kset_unregister);
diff -Nurap linux-2.6.9-100.orig/lib/kobject_uevent.c linux-2.6.9-ve023stab054/lib/kobject_uevent.c
--- linux-2.6.9-100.orig/lib/kobject_uevent.c	1970-01-01 03:00:00.000000000 +0300
+++ linux-2.6.9-ve023stab054/lib/kobject_uevent.c	2011-06-15 19:26:22.000000000 +0400
@@ -0,0 +1,368 @@
+/*
+ * kernel userspace event delivery
+ *
+ * Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004 Novell, Inc.  All rights reserved.
+ * Copyright (C) 2004 IBM, Inc. All rights reserved.
+ *
+ * Licensed under the GNU GPL v2.
+ *
+ * Authors:
+ *	Robert Love		<rml@novell.com>
+ *	Kay Sievers		<kay.sievers@vrfy.org>
+ *	Arjan van de Ven	<arjanv@redhat.com>
+ *	Greg Kroah-Hartman	<greg@kroah.com>
+ */
+
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/string.h>
+#include <linux/kobject_uevent.h>
+#include <linux/kobject.h>
+#include <net/sock.h>
+
+#define BUFFER_SIZE	1024	/* buffer for the hotplug env */
+#define NUM_ENVP	32	/* number of env pointers */
+
+#if defined(CONFIG_KOBJECT_UEVENT) || defined(CONFIG_HOTPLUG)
+static char *action_to_string(enum kobject_action action)
+{
+	switch (action) {
+	case KOBJ_ADD:
+		return "add";
+	case KOBJ_REMOVE:
+		return "remove";
+	case KOBJ_CHANGE:
+		return "change";
+	case KOBJ_MOUNT:
+		return "mount";
+	case KOBJ_UMOUNT:
+		return "umount";
+	case KOBJ_START:
+		return "start";
+	case KOBJ_STOP:
+		return "stop";
+	default:
+		return NULL;
+	}
+}
+#endif
+
+#ifdef CONFIG_KOBJECT_UEVENT
+static struct sock *uevent_sock;
+
+/**
+ * send_uevent - notify userspace by sending event trough netlink socket
+ *
+ * @signal: signal name
+ * @obj: object path (kobject)
+ * @envp: possible hotplug environment to pass with the message
+ * @gfp_mask:
+ */
+static int send_uevent(const char *signal, const char *obj,
+		       char **envp, int gfp_mask)
+{
+	struct sk_buff *skb;
+	char *pos;
+	int len;
+
+	if (!uevent_sock)
+		return -EIO;
+
+	len = strlen(signal) + 1;
+	len += strlen(obj) + 1;
+
+	/* allocate buffer with the maximum possible message size */
+	skb = alloc_skb(len + BUFFER_SIZE, gfp_mask);
+	if (!skb)
+		return -ENOMEM;
+
+	pos = skb_put(skb, len);
+	sprintf(pos, "%s@%s", signal, obj);
+
+	/* copy the environment key by key to our continuous buffer */
+	if (envp) {
+		int i;
+
+		for (i = 2; envp[i]; i++) {
+			len = strlen(envp[i]) + 1;
+			pos = skb_put(skb, len);
+			strcpy(pos, envp[i]);
+		}
+	}
+
+	return netlink_broadcast(uevent_sock, skb, 0, 1, gfp_mask);
+}
+
+static int do_kobject_uevent(struct kobject *kobj, enum kobject_action action, 
+			     struct attribute *attr, int gfp_mask)
+{
+	char *path;
+	char *attrpath;
+	char *signal;
+	int len;
+	int rc = -ENOMEM;
+
+	path = kobject_get_path(kobj, gfp_mask);
+	if (!path)
+		return -ENOMEM;
+
+	signal = action_to_string(action);
+	if (!signal)
+		return -EINVAL;
+
+	if (attr) {
+		len = strlen(path);
+		len += strlen(attr->name) + 2;
+		attrpath = kmalloc(len, gfp_mask);
+		if (!attrpath)
+			goto exit;
+		sprintf(attrpath, "%s/%s", path, attr->name);
+		rc = send_uevent(signal, attrpath, NULL, gfp_mask);
+		kfree(attrpath);
+	} else {
+		rc = send_uevent(signal, path, NULL, gfp_mask);
+	}
+
+exit:
+	kfree(path);
+	return rc;
+}
+
+/**
+ * kobject_uevent - notify userspace by sending event through netlink socket
+ * 
+ * @signal: signal name
+ * @kobj: struct kobject that the event is happening to
+ * @attr: optional struct attribute the event belongs to
+ */
+int kobject_uevent(struct kobject *kobj, enum kobject_action action,
+		   struct attribute *attr)
+{
+	return do_kobject_uevent(kobj, action, attr, GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(kobject_uevent);
+
+int kobject_uevent_atomic(struct kobject *kobj, enum kobject_action action,
+			  struct attribute *attr)
+{
+	return do_kobject_uevent(kobj, action, attr, GFP_ATOMIC);
+}
+EXPORT_SYMBOL_GPL(kobject_uevent_atomic);
+
+static int __init kobject_uevent_init(void)
+{
+	uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL);
+
+	if (!uevent_sock) {
+		printk(KERN_ERR
+		       "kobject_uevent: unable to create netlink socket!\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+postcore_initcall(kobject_uevent_init);
+
+#else
+static inline int send_uevent(const char *signal, const char *obj,
+			      char **envp, int gfp_mask)
+{
+	return 0;
+}
+
+#endif /* CONFIG_KOBJECT_UEVENT */
+
+
+#ifdef CONFIG_HOTPLUG
+char hotplug_path[HOTPLUG_PATH_LEN] = "/sbin/hotplug";
+u64 hotplug_seqnum;
+static spinlock_t sequence_lock = SPIN_LOCK_UNLOCKED;
+
+/**
+ * kobject_hotplug - notify userspace by executing /sbin/hotplug
+ *
+ * @action: action that is happening (usually "ADD" or "REMOVE")
+ * @kobj: struct kobject that the action is happening to
+ */
+void kobject_hotplug(struct kobject *kobj, enum kobject_action action)
+{
+	char *argv [3];
+	char **envp = NULL;
+	char *buffer = NULL;
+	char *seq_buff;
+	char *scratch;
+	int i = 0;
+	int retval;
+	char *kobj_path = NULL;
+	char *name = NULL;
+	char *action_string;
+	u64 seq;
+	struct kobject *top_kobj = kobj;
+	struct kset *kset;
+	static struct kset_hotplug_ops null_hotplug_ops;
+	struct kset_hotplug_ops *hotplug_ops = &null_hotplug_ops;
+
+	if (!top_kobj->kset && top_kobj->parent) {
+		do {
+			top_kobj = top_kobj->parent;
+		} while (!top_kobj->kset && top_kobj->parent);
+	}
+
+	if (top_kobj->kset)
+		kset = top_kobj->kset;
+	else
+		return;
+
+	if (kset->hotplug_ops)
+		hotplug_ops = kset->hotplug_ops;
+
+	/* If the kset has a filter operation, call it.
+	   Skip the event, if the filter returns zero. */
+	if (hotplug_ops->filter) {
+		if (!hotplug_ops->filter(kset, kobj))
+			return;
+	}
+
+	pr_debug ("%s\n", __FUNCTION__);
+
+	action_string = action_to_string(action);
+	if (!action_string)
+		return;
+
+	envp = kmalloc(NUM_ENVP * sizeof (char *), GFP_KERNEL);
+	if (!envp)
+		return;
+	memset (envp, 0x00, NUM_ENVP * sizeof (char *));
+
+	buffer = kmalloc(BUFFER_SIZE, GFP_KERNEL);
+	if (!buffer)
+		goto exit;
+
+	if (hotplug_ops->name)
+		name = hotplug_ops->name(kset, kobj);
+	if (name == NULL)
+		name = kset->kobj.name;
+
+	argv [0] = hotplug_path;
+	argv [1] = name;
+	argv [2] = NULL;
+
+	/* minimal command environment */
+	envp [i++] = "HOME=/";
+	envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+
+	scratch = buffer;
+
+	envp [i++] = scratch;
+	scratch += sprintf(scratch, "ACTION=%s", action_string) + 1;
+
+	kobj_path = kobject_get_path(kobj, GFP_KERNEL);
+	if (!kobj_path)
+		goto exit;
+
+	envp [i++] = scratch;
+	scratch += sprintf (scratch, "DEVPATH=%s", kobj_path) + 1;
+
+	envp [i++] = scratch;
+	scratch += sprintf(scratch, "SUBSYSTEM=%s", name) + 1;
+
+	/* reserve space for the sequence,
+	 * put the real one in after the hotplug call */
+	envp[i++] = seq_buff = scratch;
+	scratch += strlen("SEQNUM=18446744073709551616") + 1;
+
+	if (hotplug_ops->hotplug) {
+		/* have the kset specific function add its stuff */
+		retval = hotplug_ops->hotplug (kset, kobj,
+				  &envp[i], NUM_ENVP - i, scratch,
+				  BUFFER_SIZE - (scratch - buffer));
+		if (retval) {
+			pr_debug ("%s - hotplug() returned %d\n",
+				  __FUNCTION__, retval);
+			goto exit;
+		}
+	}
+
+	spin_lock(&sequence_lock);
+	seq = ++hotplug_seqnum;
+	spin_unlock(&sequence_lock);
+	sprintf(seq_buff, "SEQNUM=%lld", (long long)seq);
+
+	pr_debug ("%s: %s %s seq=%lld %s %s %s %s %s\n",
+		  __FUNCTION__, argv[0], argv[1], (long long)seq,
+		  envp[0], envp[1], envp[2], envp[3], envp[4]);
+
+	send_uevent(action_string, kobj_path, envp, GFP_KERNEL);
+
+	if (!hotplug_path[0])
+		goto exit;
+
+	retval = call_usermodehelper (argv[0], argv, envp, 0);
+	if (retval)
+		pr_debug ("%s - call_usermodehelper returned %d\n",
+			  __FUNCTION__, retval);
+
+exit:
+	kfree(kobj_path);
+	kfree(buffer);
+	kfree(envp);
+	return;
+}
+EXPORT_SYMBOL(kobject_hotplug);
+
+/**
+ * add_hotplug_env_var - helper for creating hotplug environment variables
+ * @envp: Pointer to table of environment variables, as passed into
+ * hotplug() method.
+ * @num_envp: Number of environment variable slots available, as
+ * passed into hotplug() method.
+ * @cur_index: Pointer to current index into @envp.  It should be
+ * initialized to 0 before the first call to add_hotplug_env_var(),
+ * and will be incremented on success.
+ * @buffer: Pointer to buffer for environment variables, as passed
+ * into hotplug() method.
+ * @buffer_size: Length of @buffer, as passed into hotplug() method.
+ * @cur_len: Pointer to current length of space used in @buffer.
+ * Should be initialized to 0 before the first call to
+ * add_hotplug_env_var(), and will be incremented on success.
+ * @format: Format for creating environment variable (of the form
+ * "XXX=%x") for snprintf().
+ *
+ * Returns 0 if environment variable was added successfully or -ENOMEM
+ * if no space was available.
+ */
+int add_hotplug_env_var(char **envp, int num_envp, int *cur_index,
+			char *buffer, int buffer_size, int *cur_len,
+			const char *format, ...)
+{
+	va_list args;
+
+	/*
+	 * We check against num_envp - 1 to make sure there is at
+	 * least one slot left after we return, since the hotplug
+	 * method needs to set the last slot to NULL.
+	 */
+	if (*cur_index >= num_envp - 1)
+		return -ENOMEM;
+
+	envp[*cur_index] = buffer + *cur_len;
+
+	va_start(args, format);
+	*cur_len += vsnprintf(envp[*cur_index],
+			      max(buffer_size - *cur_len, 0),
+			      format, args) + 1;
+	va_end(args);
+
+	if (*cur_len > buffer_size)
+		return -ENOMEM;
+
+	(*cur_index)++;
+	return 0;
+}
+EXPORT_SYMBOL(add_hotplug_env_var);
+
+#endif /* CONFIG_HOTPLUG */
diff -Nurap linux-2.6.9-100.orig/mm/filemap.c linux-2.6.9-ve023stab054/mm/filemap.c
--- linux-2.6.9-100.orig/mm/filemap.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/filemap.c	2011-06-15 19:26:19.000000000 +0400
@@ -130,20 +130,6 @@ void remove_from_page_cache(struct page 
 	spin_unlock_irq(&mapping->tree_lock);
 }
 
-static inline int sync_page(struct page *page)
-{
-	struct address_space *mapping;
-
-	/*
-	 * FIXME, fercrissake.  What is this barrier here for?
-	 */
-	smp_mb();
-	mapping = page_mapping(page);
-	if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
-		return mapping->a_ops->sync_page(page);
-	return 0;
-}
-
 /**
  * filemap_fdatawrite_range - start writeback against all of a mapping's
  * dirty pages that lie within the byte offsets <start, end>
@@ -962,6 +948,8 @@ int file_read_actor(read_descriptor_t *d
 	if (size > count)
 		size = count;
 
+	left = size;
+#ifndef CONFIG_X86_UACCESS_INDIRECT
 	/*
 	 * Faults on the destination of a read are common, so do it before
 	 * taking the kmap.
@@ -971,20 +959,21 @@ int file_read_actor(read_descriptor_t *d
 		left = __copy_to_user_inatomic(desc->arg.buf,
 						kaddr + offset, size);
 		kunmap_atomic(kaddr, KM_USER0);
-		if (left == 0)
-			goto success;
 	}
+#endif
 
-	/* Do it the slow way */
-	kaddr = kmap(page);
-	left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
-	kunmap(page);
-
-	if (left) {
-		size -= left;
-		desc->error = -EFAULT;
+	if (left != 0) {
+		/* Do it the slow way */
+		kaddr = kmap(page);
+		left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
+		kunmap(page);
+
+		if (left) {
+			size -= left;
+			desc->error = -EFAULT;
+		}
 	}
-success:
+
 	desc->count = count - size;
 	desc->written += size;
 	desc->arg.buf += size;
@@ -1794,9 +1783,13 @@ filemap_copy_from_user(struct page *page
 	char *kaddr;
 	int left;
 
+#ifndef CONFIG_X86_UACCESS_INDIRECT
 	kaddr = kmap_atomic(page, KM_USER0);
 	left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
 	kunmap_atomic(kaddr, KM_USER0);
+#else
+	left = bytes;
+#endif
 
 	if (left != 0) {
 		/* Do it the slow way */
@@ -1847,10 +1840,14 @@ filemap_copy_from_user_iovec(struct page
 	char *kaddr;
 	size_t copied;
 
+#ifndef CONFIG_X86_UACCESS_INDIRECT
 	kaddr = kmap_atomic(page, KM_USER0);
 	copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
 						base, bytes);
 	kunmap_atomic(kaddr, KM_USER0);
+#else
+	copied = 0;
+#endif
 	if (copied != bytes) {
 		kaddr = kmap(page);
 		copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
@@ -1866,7 +1863,7 @@ filemap_set_next_iovec(const struct iove
 	const struct iovec *iov = *iovp;
 	size_t base = *basep;
 
-	while (bytes) {
+	do {
 		int copy = min(bytes, iov->iov_len - base);
 
 		bytes -= copy;
@@ -1875,7 +1872,7 @@ filemap_set_next_iovec(const struct iove
 			iov++;
 			base = 0;
 		}
-	}
+	} while (bytes);
 	*iovp = iov;
 	*basep = base;
 }
@@ -2044,8 +2041,16 @@ generic_file_buffered_write(struct kiocb
 		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
 		index = pos >> PAGE_CACHE_SHIFT;
 		bytes = PAGE_CACHE_SIZE - offset;
-		if (bytes > count)
-			bytes = count;
+
+		/* Limit the size of the copy to the caller's write size */
+		bytes = min(bytes, count);
+
+		/*
+		 * Limit the size of the copy to that of the current segment,
+		 * because fault_in_pages_readable() doesn't know how to walk
+		 * segments.
+		 */
+		bytes = min(bytes, cur_iov->iov_len - iov_base);
 
 		/*
 		 * Bring in the user page that we will copy from _first_.
@@ -2063,7 +2068,17 @@ generic_file_buffered_write(struct kiocb
 			break;
 		}
 
+		if (unlikely(bytes == 0)) {
+			status = 0;
+			copied = 0;
+			goto zero_length_segment;
+		}
+
 		status = a_ops->prepare_write(file, page, offset, offset+bytes);
+		if (status > 0) {
+			bytes = status;
+			status = 0;
+		}
 		if (unlikely(status)) {
 			loff_t isize = i_size_read(inode);
 			/*
@@ -2084,7 +2099,8 @@ generic_file_buffered_write(struct kiocb
 						cur_iov, iov_base, bytes);
 		flush_dcache_page(page);
 		status = a_ops->commit_write(file, page, offset, offset+bytes);
-		if (likely(copied > 0)) {
+zero_length_segment:
+		if (likely(copied >= 0)) {
 			if (!status)
 				status = copied;
 
@@ -2109,8 +2125,12 @@ generic_file_buffered_write(struct kiocb
 		unlock_page(page);
 		mark_page_accessed(page);
 		page_cache_release(page);
-		if (status < 0)
+		if (status < 0) {
+			loff_t isize = i_size_read(inode);
+			if (pos + bytes > isize)
+				vmtruncate(inode, isize);
 			break;
+		}
 		balance_dirty_pages_ratelimited(mapping);
 		cond_resched();
 	} while (count);
diff -Nurap linux-2.6.9-100.orig/mm/fremap.c linux-2.6.9-ve023stab054/mm/fremap.c
--- linux-2.6.9-100.orig/mm/fremap.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/fremap.c	2011-06-15 19:26:22.000000000 +0400
@@ -19,6 +19,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long addr, pte_t *ptep)
 {
@@ -36,9 +38,11 @@ static inline void zap_pte(struct mm_str
 			if (!PageReserved(page)) {
 				if (pte_dirty(pte))
 					set_page_dirty(page);
-				page_remove_rmap(page);
+				page_remove_rmap(page, vma);
+				pb_remove_ref(page, mm_ub(mm));
 				page_cache_release(page);
 				mm->rss--;
+				ub_unused_privvm_inc(mm_ub(mm), 1, vma);
 			}
 		}
 	} else {
@@ -62,7 +66,10 @@ int install_page(struct mm_struct *mm, s
 	pgd_t *pgd;
 	pmd_t *pmd;
 	pte_t pte_val;
+	struct page_beancounter *pbc;
 
+	if (pb_alloc(&pbc))
+		goto err_pb;
 	pgd = pgd_offset(mm, addr);
 	spin_lock(&mm->page_table_lock);
 
@@ -79,14 +86,20 @@ int install_page(struct mm_struct *mm, s
 	 * caller about it.
 	 */
 	err = -EINVAL;
-	inode = vma->vm_file->f_mapping->host;
-	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (!page->mapping || page->index >= size)
-		goto err_unlock;
+	if (vma->vm_file) {
+		inode = vma->vm_file->f_mapping->host;
+		size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		if (!page->mapping || page->index >= size) {
+			pte_unmap(pte);
+			goto err_unlock;
+		}
+	}
 
 	zap_pte(mm, vma, addr, pte);
 
 	mm->rss++;
+	pb_add_ref(page, mm_ub(mm), &pbc);
+	ub_unused_privvm_dec(mm_ub(mm), 1, vma);
 	flush_icache_page(vma, page);
 	set_pte(pte, mk_pte(page, prot));
 	page_add_file_rmap(page);
@@ -97,6 +110,8 @@ int install_page(struct mm_struct *mm, s
 	err = 0;
 err_unlock:
 	spin_unlock(&mm->page_table_lock);
+	pb_free(&pbc);
+err_pb:
 	return err;
 }
 EXPORT_SYMBOL(install_page);
@@ -245,4 +260,4 @@ asmlinkage long sys_remap_file_pages(uns
 
 	return err;
 }
-
+EXPORT_SYMBOL(sys_remap_file_pages);
diff -Nurap linux-2.6.9-100.orig/mm/memory.c linux-2.6.9-ve023stab054/mm/memory.c
--- linux-2.6.9-100.orig/mm/memory.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/memory.c	2011-06-15 19:26:22.000000000 +0400
@@ -40,6 +40,7 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
+#include <linux/virtinfo.h>
 #include <linux/swap.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
@@ -56,6 +57,9 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+
 #ifndef CONFIG_DISCONTIGMEM
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
@@ -93,12 +97,14 @@ void pgd_clear_bad(pgd_t *pgd)
         pgd_ERROR(*pgd);
         pgd_clear(pgd);
 }
+EXPORT_SYMBOL(pgd_clear_bad);
 
 void pmd_clear_bad(pmd_t *pmd)
 {
         pmd_ERROR(*pmd);
         pmd_clear(pmd);
 }
+EXPORT_SYMBOL(pmd_clear_bad);
 
 /*
  * We special-case the C-O-W ZERO_PAGE, because it's such
@@ -202,6 +208,7 @@ pte_t fastcall * pte_alloc_map(struct mm
 out:
 	return pte_offset_map(pmd, address);
 }
+EXPORT_SYMBOL(pte_alloc_map);
 
 pte_t fastcall * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 {
@@ -229,6 +236,8 @@ out:
 }
 #define PTE_TABLE_MASK	((PTRS_PER_PTE-1) * sizeof(pte_t))
 #define PMD_TABLE_MASK	((PTRS_PER_PMD-1) * sizeof(pmd_t))
+#define pb_list_size(addr)	\
+		(PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
 
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
@@ -241,13 +250,15 @@ out:
  * dst->page_table_lock is held on entry and exit,
  * but may be dropped within pmd_alloc() and pte_alloc_map().
  */
-int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
-			struct vm_area_struct *vma)
+int __copy_page_range(struct vm_area_struct *vma, struct mm_struct *src,
+		      unsigned long address, size_t size)
 {
+	struct mm_struct *dst = vma->vm_mm;
 	pgd_t * src_pgd, * dst_pgd;
-	unsigned long address = vma->vm_start;
-	unsigned long end = vma->vm_end;
+	unsigned long end = address + size;
 	unsigned long cow;
+	struct page_beancounter *pbc;
+	int need_pbc;
 
 	/*
 	 * Don't copy ptes where a page fault will fill them correctly.
@@ -266,6 +277,8 @@ int copy_page_range(struct mm_struct *ds
 	cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 	src_pgd = pgd_offset(src, address)-1;
 	dst_pgd = pgd_offset(dst, address)-1;
+	pbc = NULL;
+	need_pbc = (mm_ub(dst) != mm_ub(src));
 
 	for (;;) {
 		pmd_t * src_pmd, * dst_pmd;
@@ -307,6 +320,10 @@ skip_copy_pte_range:
 				goto cont_copy_pmd_range;
 			}
 
+			if (need_pbc &&
+			    pb_alloc_list(&pbc, pb_list_size(address), dst))
+				goto nomem;
+
 			dst_pte = pte_alloc_map(dst, dst_pmd, address);
 			if (!dst_pte)
 				goto nomem;
@@ -363,6 +380,8 @@ skip_copy_pte_range:
 				dst->rss++;
 				if (PageAnon(page))
 					dst->anon_rss++;
+				ub_unused_privvm_dec(mm_ub(dst), 1, vma);
+				pb_add_list_ref(page, mm_ub(src), mm_ub(dst), &pbc);
 				set_pte(dst_pte, pte);
 				page_dup_rmap(page);
 cont_copy_pte_range_noset:
@@ -387,14 +406,26 @@ cont_copy_pmd_range:
 out_unlock:
 	spin_unlock(&src->page_table_lock);
 out:
+	pb_free_list(&pbc);
 	return 0;
 nomem:
+	pb_free_list(&pbc);
 	return -ENOMEM;
 }
+EXPORT_SYMBOL(__copy_page_range);
+
+int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+			struct vm_area_struct *vma)
+{
+	if (vma->vm_mm != dst)
+		BUG();
+	return __copy_page_range(vma, src, vma->vm_start, vma->vm_end-vma->vm_start);
+}
 
 static void zap_pte_range(struct mmu_gather *tlb,
 		pmd_t *pmd, unsigned long address,
-		unsigned long size, struct zap_details *details)
+		unsigned long size, struct zap_details *details,
+		struct vm_area_struct *vma)
 {
 	unsigned long offset;
 	pte_t *ptep;
@@ -458,7 +489,8 @@ static void zap_pte_range(struct mmu_gat
 			else if (pte_young(pte))
 				mark_page_accessed(page);
 			tlb->freed++;
-			page_remove_rmap(page);
+			page_remove_rmap(page, vma);
+			pb_remove_ref(page, mm_ub(tlb->mm));
 			tlb_remove_page(tlb, page);
 			continue;
 		}
@@ -477,7 +509,8 @@ static void zap_pte_range(struct mmu_gat
 
 static void zap_pmd_range(struct mmu_gather *tlb,
 		pgd_t * dir, unsigned long address,
-		unsigned long size, struct zap_details *details)
+		unsigned long size, struct zap_details *details,
+		struct vm_area_struct *vma)
 {
 	pmd_t * pmd;
 	unsigned long end, pgd_boundary;
@@ -495,7 +528,7 @@ static void zap_pmd_range(struct mmu_gat
 	if (pgd_boundary && (end > pgd_boundary))
 		end = pgd_boundary;
 	do {
-		zap_pte_range(tlb, pmd, address, end - address, details);
+		zap_pte_range(tlb, pmd, address, end - address, details, vma);
 		address = (address + PMD_SIZE) & PMD_MASK; 
 		pmd++;
 	} while (address && (address < end));
@@ -505,16 +538,21 @@ static void unmap_page_range(struct mmu_
 		struct vm_area_struct *vma, unsigned long address,
 		unsigned long end, struct zap_details *details)
 {
+	unsigned long freed;
 	pgd_t * dir;
 
 	BUG_ON(address >= end);
 	dir = pgd_offset(vma->vm_mm, address);
 	tlb_start_vma(tlb, vma);
+	freed = tlb->freed;
 	do {
-		zap_pmd_range(tlb, dir, address, end - address, details);
+		zap_pmd_range(tlb, dir, address, end - address, details, vma);
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
+	freed = tlb->freed - freed;
+	if (freed)
+		ub_unused_privvm_inc(mm_ub(tlb->mm), freed, vma);
 	tlb_end_vma(tlb, vma);
 }
 
@@ -642,6 +680,7 @@ void zap_page_range(struct vm_area_struc
 	unsigned long nr_accounted = 0;
 
 	if (is_vm_hugetlb_page(vma)) {
+		/* ub acct is performed in unmap_hugepage_range */
 		zap_hugepage_range(vma, address, size);
 		return;
 	}
@@ -659,23 +698,15 @@ EXPORT_SYMBOL(zap_page_range);
  * Do a quick page-table lookup for a single page.
  * mm->page_table_lock must be held.
  */
-struct page *
-follow_page(struct mm_struct *mm, unsigned long address, int write) 
+static struct page *
+pgd_follow_page(struct mm_struct *mm, pgd_t *pgd, unsigned long address,
+		int write)
 {
-	pgd_t *pgd;
 	pmd_t *pmd;
 	pte_t *ptep, pte;
 	unsigned long pfn;
 	struct page *page;
 
-	page = follow_huge_addr(mm, address, write);
-	if (! IS_ERR(page))
-		return page;
-
-	pgd = pgd_offset(mm, address);
-	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
-		goto out;
-
 	pmd = pmd_offset(pgd, address);
 	if (pmd_none(*pmd))
 		goto out;
@@ -708,6 +739,40 @@ out:
 }
 
 struct page *
+follow_page(struct mm_struct *mm, unsigned long address, int write)
+{
+	pgd_t *pgd;
+	struct page *page;
+
+	page = follow_huge_addr(mm, address, write);
+	if (! IS_ERR(page))
+		return page;
+
+	pgd = pgd_offset(mm, address);
+	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+		return NULL;
+
+	return pgd_follow_page(mm, pgd, address, write);
+}
+
+struct page *
+follow_page_k(unsigned long address, int write)
+{
+	pgd_t *pgd;
+	struct page *page;
+
+	page = follow_huge_addr(&init_mm, address, write);
+	if (! IS_ERR(page))
+		return page;
+
+	pgd = pgd_offset_k(address);
+	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+		return NULL;
+
+	return pgd_follow_page(&init_mm, pgd, address, write);
+}
+
+struct page *
 follow_page_pte(struct mm_struct *mm, unsigned long address, int write,
 		pte_t *page_pte)
 {
@@ -1184,6 +1249,7 @@ static int do_wp_page(struct mm_struct *
 	unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
 {
 	struct page *old_page, *new_page;
+	struct page_beancounter *pbc;
 	unsigned long pfn = pte_pfn(pte);
 	pte_t entry;
 
@@ -1208,6 +1274,7 @@ static int do_wp_page(struct mm_struct *
 			flush_cache_page(vma, address);
 			entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
 					      vma);
+			clear_bit(PG_checkpointed, &old_page->flags);
 			ptep_set_access_flags(vma, address, page_table, entry, 1);
 			update_mmu_cache(vma, address, entry);
 			lazy_mmu_prot_update(entry);
@@ -1225,6 +1292,9 @@ static int do_wp_page(struct mm_struct *
 		page_cache_get(old_page);
 	spin_unlock(&mm->page_table_lock);
 
+	if (pb_alloc(&pbc))
+		goto out;
+
 	if (unlikely(anon_vma_prepare(vma)))
 		goto no_new_page;
 	new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
@@ -1240,10 +1310,15 @@ static int do_wp_page(struct mm_struct *
 	if (likely(pte_same(*page_table, pte))) {
 		if (PageAnon(old_page))
 			mm->anon_rss--;
-		if (PageReserved(old_page))
+		if (PageReserved(old_page)) {
 			++mm->rss;
-		else
-			page_remove_rmap(old_page);
+			ub_unused_privvm_dec(mm_ub(mm), 1, vma);
+		} else {
+			page_remove_rmap(old_page, vma);
+			pb_remove_ref(old_page, mm_ub(mm));
+		}
+
+		pb_add_ref(new_page, mm_ub(mm), &pbc);
 		break_cow(vma, new_page, address, page_table);
 		lru_cache_add_active(new_page);
 		page_add_anon_rmap(new_page, vma, address);
@@ -1258,6 +1333,8 @@ static int do_wp_page(struct mm_struct *
 	return VM_FAULT_MINOR;
 
 no_new_page:
+	pb_free(&pbc);
+out:
 	page_cache_release(old_page);
 	return VM_FAULT_OOM;
 }
@@ -1568,12 +1645,21 @@ static int do_swap_page(struct mm_struct
 	pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
 {
 	struct page *page;
+	struct page_beancounter *pbc;
 	swp_entry_t entry = pte_to_swp_entry(orig_pte);
 	pte_t pte;
-	int ret = VM_FAULT_MINOR;
+	int ret;
+	cycles_t start;
 
 	pte_unmap(page_table);
 	spin_unlock(&mm->page_table_lock);
+	start = get_cycles();
+	pbc = NULL;
+	ret = VM_FAULT_OOM;
+	if (pb_alloc(&pbc))
+		goto out_nopbc;
+
+	ret = VM_FAULT_MINOR;
 	page = lookup_swap_cache(entry);
 	if (!page) {
  		swapin_readahead(entry, address, vma);
@@ -1622,10 +1708,12 @@ static int do_swap_page(struct mm_struct
 	/* The page isn't present yet, go ahead with the fault. */
 		
 	swap_free(entry);
-	if (vm_swap_full())
-		remove_exclusive_swap_page(page);
+	try_to_remove_exclusive_swap_page(page);
 
 	mm->rss++;
+	mm_ub(mm)->ub_perfstat[smp_processor_id()].swapin++;
+	ub_unused_privvm_dec(mm_ub(mm), 1, vma);
+	pb_add_ref(page, mm_ub(mm), &pbc);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if (write_access && can_share_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1650,7 +1738,13 @@ static int do_swap_page(struct mm_struct
 	pte_unmap(page_table);
 	spin_unlock(&mm->page_table_lock);
 out:
+	pb_free(&pbc);
+	spin_lock_irq(&kstat_glb_lock);
+	KSTAT_LAT_ADD(&kstat_glob.swap_in, get_cycles() - start);
+	spin_unlock_irq(&kstat_glb_lock);
+out_nopbc:
 	return ret;
+
 out_nomap:
 	pte_unmap(page_table);
 	spin_unlock(&mm->page_table_lock);
@@ -1708,6 +1802,7 @@ do_anonymous_page(struct mm_struct *mm, 
 {
 	pte_t entry;
 	struct page * page;
+	struct page_beancounter *pbc;
 
 	pte_unmap(page_table);
 	spin_unlock(&mm->page_table_lock);
@@ -1717,8 +1812,11 @@ do_anonymous_page(struct mm_struct *mm, 
 		return VM_FAULT_SIGBUS;
 
 	/* Use the zero-page for reads */
+	pbc = NULL;
 	if (write_access) {
 		/* Allocate our own private page. */
+		if (pb_alloc(&pbc))
+			goto no_mem;
 		if (unlikely(anon_vma_prepare(vma)))
 			goto no_mem;
 		page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
@@ -1736,6 +1834,8 @@ do_anonymous_page(struct mm_struct *mm, 
 			goto out;
 		}
 		mm->rss++;
+		ub_unused_privvm_dec(mm_ub(mm), 1, vma);
+		pb_add_ref(page, mm_ub(mm), &pbc);
 		entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
 							 vma->vm_page_prot)),
 				      vma);
@@ -1767,8 +1867,10 @@ do_anonymous_page(struct mm_struct *mm, 
 	lazy_mmu_prot_update(entry);
 	spin_unlock(&mm->page_table_lock);
 out:
+	pb_free(&pbc);
 	return VM_FAULT_MINOR;
 no_mem:
+	pb_free(&pbc);
 	return VM_FAULT_OOM;
 }
 
@@ -1789,6 +1891,7 @@ do_no_page(struct mm_struct *mm, struct 
 	unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
 {
 	struct page * new_page;
+	struct page_beancounter *pbc;
 	struct address_space *mapping = NULL;
 	pte_t entry;
 	int sequence = 0;
@@ -1801,6 +1904,9 @@ do_no_page(struct mm_struct *mm, struct 
 	pte_unmap(page_table);
 	spin_unlock(&mm->page_table_lock);
 
+	if (pb_alloc(&pbc))
+		return VM_FAULT_OOM;
+
 	if (vma->vm_file) {
 		mapping = vma->vm_file->f_mapping;
 		sequence = atomic_read(&mapping->truncate_count);
@@ -1810,10 +1916,14 @@ retry:
 	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
 
 	/* no page was available -- either SIGBUS or OOM */
-	if (new_page == NOPAGE_SIGBUS)
+	if (new_page == NOPAGE_SIGBUS) {
+		pb_free(&pbc);
 		return VM_FAULT_SIGBUS;
-	if (new_page == NOPAGE_OOM)
+	}
+	if (new_page == NOPAGE_OOM) {
+		pb_free(&pbc);
 		return VM_FAULT_OOM;
+	}
 
 	/*
 	 * Should we do an early C-O-W break?
@@ -1862,8 +1972,11 @@ retry:
 	 */
 	/* Only go through if we didn't race with anybody else... */
 	if (pte_none(*page_table)) {
-		if (!PageReserved(new_page))
+		if (!PageReserved(new_page)) {
 			++mm->rss;
+			ub_unused_privvm_dec(mm_ub(mm), 1, vma);
+			pb_add_ref(new_page, mm_ub(mm), &pbc);
+		}
 		flush_icache_page(vma, new_page);
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
@@ -1890,6 +2003,7 @@ retry:
 	update_mmu_cache(vma, address, entry);
 	spin_unlock(&mm->page_table_lock);
 out:
+	pb_free(&pbc);
 	return ret;
 oom:
 	page_cache_release(new_page);
@@ -1997,6 +2111,28 @@ int handle_mm_fault(struct mm_struct *mm
 	pgd_t *pgd;
 	pmd_t *pmd;
 
+#ifdef CONFIG_VZ_GENCALLS
+	do {
+		int ret;
+#ifdef CONFIG_USER_RESOURCE 
+		struct task_beancounter *tbc;
+
+		tbc = task_bc(current);
+		if (!test_bit(UB_AFLAG_NOTIF_PAGEIN, &mm_ub(mm)->ub_aflags) &&
+		    tbc->pgfault_allot) {
+			tbc->pgfault_allot--;
+			break; /* skip notifier */
+		}
+#endif
+		ret = virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_PAGEIN,
+					(void *)1);
+		if (ret & NOTIFY_FAIL)
+			return VM_FAULT_SIGBUS;
+		if (ret & NOTIFY_OK)
+			return VM_FAULT_MINOR; /* retry */
+	} while (0);
+#endif
+
 	__set_current_state(TASK_RUNNING);
 	pgd = pgd_offset(mm, address);
 
@@ -2020,6 +2156,7 @@ int handle_mm_fault(struct mm_struct *mm
 	spin_unlock(&mm->page_table_lock);
 	return VM_FAULT_OOM;
 }
+EXPORT_SYMBOL(handle_mm_fault);
 
 /*
  * Allocate page middle directory.
@@ -2052,6 +2189,7 @@ pmd_t fastcall *__pmd_alloc(struct mm_st
 out:
 	return pmd_offset(pgd, address);
 }
+EXPORT_SYMBOL(__pmd_alloc);
 
 int make_pages_present(unsigned long addr, unsigned long end)
 {
@@ -2073,6 +2211,7 @@ int make_pages_present(unsigned long add
 		return ret;
 	return ret == len ? 0 : -1;
 }
+EXPORT_SYMBOL(make_pages_present);
 
 /* 
  * Map a vmalloc()-space virtual address to the physical page.
diff -Nurap linux-2.6.9-100.orig/mm/mempolicy.c linux-2.6.9-ve023stab054/mm/mempolicy.c
--- linux-2.6.9-100.orig/mm/mempolicy.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/mempolicy.c	2011-06-15 19:26:18.000000000 +0400
@@ -136,6 +136,8 @@ static int get_nodes(unsigned long *node
 	bitmap_zero(nodes, MAX_NUMNODES);
 	if (maxnode == 0 || !nmask)
 		return 0;
+	if (maxnode > PAGE_SIZE*8 /*BITS_PER_BYTE*/)
+		return -EINVAL;
 
 	nlongs = BITS_TO_LONGS(maxnode);
 	if (nlongs == 0)
diff -Nurap linux-2.6.9-100.orig/mm/mempool.c linux-2.6.9-ve023stab054/mm/mempool.c
--- linux-2.6.9-100.orig/mm/mempool.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/mempool.c	2011-06-15 19:26:19.000000000 +0400
@@ -10,6 +10,7 @@
 
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/module.h>
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
@@ -72,6 +73,9 @@ mempool_t * mempool_create(int min_nr, m
 	pool->alloc = alloc_fn;
 	pool->free = free_fn;
 
+	if (alloc_fn == mempool_alloc_slab)
+		kmem_mark_nocharge((kmem_cache_t *)pool_data);
+
 	/*
 	 * First pre-allocate the guaranteed number of buffers.
 	 */
@@ -112,6 +116,7 @@ int mempool_resize(mempool_t *pool, int 
 	unsigned long flags;
 
 	BUG_ON(new_min_nr <= 0);
+	gfp_mask &= ~__GFP_UBC;
 
 	spin_lock_irqsave(&pool->lock, flags);
 	if (new_min_nr < pool->min_nr) {
@@ -194,6 +199,9 @@ void * mempool_alloc(mempool_t *pool, in
 	DEFINE_WAIT(wait);
 	int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
 
+	gfp_mask &= ~__GFP_UBC;
+	gfp_nowait &= ~__GFP_UBC;
+
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 repeat_alloc:
 	element = pool->alloc(gfp_nowait|__GFP_NOWARN, pool->pool_data);
diff -Nurap linux-2.6.9-100.orig/mm/mlock.c linux-2.6.9-ve023stab054/mm/mlock.c
--- linux-2.6.9-100.orig/mm/mlock.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/mlock.c	2011-06-15 19:26:21.000000000 +0400
@@ -8,6 +8,9 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 
+#include <ub/ub_vmpages.h>
+#include <linux/module.h>
+
 
 static int mlock_fixup(struct vm_area_struct * vma, 
 	unsigned long start, unsigned long end, unsigned int newflags)
@@ -19,16 +22,22 @@ static int mlock_fixup(struct vm_area_st
 	if (newflags == vma->vm_flags)
 		goto out;
 
+	if (newflags & VM_LOCKED) {
+		ret = ub_locked_mem_charge(mm_ub(mm), end - start);
+		if (ret < 0)
+			goto out;
+	}
+
 	if (start != vma->vm_start) {
 		ret = split_vma(mm, vma, start, 1);
 		if (ret)
-			goto out;
+			goto out_uncharge;
 	}
 
 	if (end != vma->vm_end) {
 		ret = split_vma(mm, vma, end, 0);
 		if (ret)
-			goto out;
+			goto out_uncharge;
 	}
 
 	/*
@@ -46,9 +55,17 @@ static int mlock_fixup(struct vm_area_st
 		pages = -pages;
 		if (!(newflags & VM_IO))
 			ret = make_pages_present(start, end);
+	} else {
+		/* uncharge this memory, since it was unlocked */
+		ub_locked_mem_uncharge(mm_ub(mm), end - start);
 	}
 
 	vma->vm_mm->locked_vm -= pages;
+	return ret;
+
+out_uncharge:
+	if (newflags & VM_LOCKED)
+		ub_locked_mem_uncharge(mm_ub(mm), end - start);
 out:
 	if (ret == -ENOMEM)
 		ret = -EAGAIN;
@@ -125,6 +142,7 @@ asmlinkage long sys_mlock(unsigned long 
 	up_write(&current->mm->mmap_sem);
 	return error;
 }
+EXPORT_SYMBOL(sys_mlock);
 
 asmlinkage long sys_munlock(unsigned long start, size_t len)
 {
@@ -137,6 +155,7 @@ asmlinkage long sys_munlock(unsigned lon
 	up_write(&current->mm->mmap_sem);
 	return ret;
 }
+EXPORT_SYMBOL(sys_munlock);
 
 static int do_mlockall(int flags)
 {
diff -Nurap linux-2.6.9-100.orig/mm/mmap.c linux-2.6.9-ve023stab054/mm/mmap.c
--- linux-2.6.9-100.orig/mm/mmap.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/mmap.c	2011-06-15 19:26:21.000000000 +0400
@@ -33,6 +33,8 @@
 #define arch_mmap_check(addr, len, flags)	(0)
 #endif
 
+#include <ub/ub_vmpages.h>
+
 /*
  * WARNING: the debugging will use recursive algorithms so never enable this
  * unless you know what you are doing.
@@ -101,6 +103,8 @@ static void remove_vm_struct(struct vm_a
 	struct file *file = vma->vm_file;
 
 	might_sleep();
+	ub_memory_uncharge(mm_ub(vma->vm_mm), vma->vm_end - vma->vm_start,
+			vma->vm_flags, vma->vm_file);
 	if (file) {
 		struct address_space *mapping = file->f_mapping;
 		spin_lock(&mapping->i_mmap_lock);
@@ -116,6 +120,7 @@ static void remove_vm_struct(struct vm_a
 	kmem_cache_free(vm_area_cachep, vma);
 }
 
+static unsigned long __do_brk(unsigned long, unsigned long, int);
 /*
  *  sys_brk() for the most part doesn't need the global kernel
  *  lock, except when an application is doing something nasty
@@ -155,7 +160,7 @@ asmlinkage unsigned long sys_brk(unsigne
 		goto out;
 
 	/* Ok, looks good - let it rip. */
-	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+	if (__do_brk(oldbrk, newbrk-oldbrk, UB_HARD) != oldbrk)
 		goto out;
 set_brk:
 	mm->brk = brk;
@@ -833,6 +838,12 @@ unsigned long do_mmap_pgoff(struct file 
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
+	if (file && (prot & PROT_EXEC)) {
+		error = check_area_execute_ve(file->f_dentry, file->f_vfsmnt);
+		if (error)
+			return error;
+	}
+
 	/* Obtain the address to map to. we verify (or select) it and ensure
 	 * that it represents a valid section of the address space.
 	 */
@@ -947,6 +958,11 @@ munmap_back:
 		}
 	}
 
+	error = -ENOMEM;
+	if (ub_memory_charge(mm_ub(mm), len, vm_flags, file,
+				(flags & MAP_EXECPRIO ? UB_SOFT : UB_HARD)))
+		goto uncharge_error;
+
 	/*
 	 * Can we just expand an old private anonymous mapping?
 	 * The VM_SHARED test is necessary because shmem_zero_setup
@@ -962,7 +978,8 @@ munmap_back:
 	 * specific mapper. the address has already been validated, but
 	 * not unmapped, but the maps are removed from the list.
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
+			(flags & MAP_EXECPRIO ? __GFP_SOFT_UBC : 0));
 	if (!vma) {
 		error = -ENOMEM;
 		goto unacct_error;
@@ -1063,6 +1080,8 @@ unmap_and_free_vma:
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
+	ub_memory_uncharge(mm_ub(mm), len, vm_flags, file);
+uncharge_error:
 	if (charged)
 		vm_unacct_memory(charged);
 	return error;
@@ -1504,18 +1523,18 @@ int expand_upwards(struct vm_area_struct
 	
 	if (over_stack_limit(address - vma->vm_start) ||
 			((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
-			current->rlim[RLIMIT_AS].rlim_cur) {
-		anon_vma_unlock(vma);
-		vm_unacct_memory(grow);
-		return -ENOMEM;
-	}
+			current->rlim[RLIMIT_AS].rlim_cur)
+		goto out_nomem;
+
 	if ((vma->vm_flags & VM_LOCKED) && !capable(CAP_IPC_LOCK) &&
 			((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) >
-			current->rlim[RLIMIT_MEMLOCK].rlim_cur) {
-		anon_vma_unlock(vma);
-		vm_unacct_memory(grow);
-		return -ENOMEM;
-	}
+			current->rlim[RLIMIT_MEMLOCK].rlim_cur)
+		goto out_nomem;
+
+	if (ub_memory_charge(mm_ub(vma->vm_mm), address - vma->vm_end,
+				vma->vm_flags, vma->vm_file, UB_SOFT))
+		goto out_nomem;
+
 	vma->vm_end = address;
 	vma->vm_mm->total_vm += grow;
 	if (vma->vm_flags & VM_LOCKED)
@@ -1523,6 +1542,11 @@ int expand_upwards(struct vm_area_struct
 	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
 	anon_vma_unlock(vma);
 	return 0;
+
+out_nomem:
+	anon_vma_unlock(vma);
+	vm_unacct_memory(grow);
+	return -ENOMEM;
 }
 #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
 
@@ -1597,18 +1621,18 @@ int expand_stack(struct vm_area_struct *
 	
 	if (over_stack_limit(vma->vm_end - address) ||
 			((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
-			current->rlim[RLIMIT_AS].rlim_cur) {
-		anon_vma_unlock(vma);
-		vm_unacct_memory(grow);
-		return -ENOMEM;
-	}
+			current->rlim[RLIMIT_AS].rlim_cur)
+		goto out_nomem;
+
 	if ((vma->vm_flags & VM_LOCKED) && !capable(CAP_IPC_LOCK) &&
 			((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) >
-			current->rlim[RLIMIT_MEMLOCK].rlim_cur) {
-		anon_vma_unlock(vma);
-		vm_unacct_memory(grow);
-		return -ENOMEM;
-	}
+			current->rlim[RLIMIT_MEMLOCK].rlim_cur)
+		goto out_nomem;
+
+	if (ub_memory_charge(mm_ub(vma->vm_mm), vma->vm_start - address,
+				vma->vm_flags, vma->vm_file, UB_SOFT))
+		goto out_nomem;
+
 	vma->vm_start = address;
 	vma->vm_pgoff -= grow;
 	vma->vm_mm->total_vm += grow;
@@ -1617,6 +1641,11 @@ int expand_stack(struct vm_area_struct *
 	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
 	anon_vma_unlock(vma);
 	return 0;
+
+out_nomem:
+	anon_vma_unlock(vma);
+	vm_unacct_memory(grow);
+	return -ENOMEM;
 }
 
 struct vm_area_struct *
@@ -1790,6 +1819,7 @@ detach_vmas_to_be_unmapped(struct mm_str
 	tail_vma->vm_next = NULL;
 	mm->mmap_cache = NULL;		/* Kill the cache. */
 }
+EXPORT_SYMBOL(split_vma);
 
 /*
  * __split_vma() bypasses sysctl_max_map_count checking.  We use this on the
@@ -1951,7 +1981,7 @@ asmlinkage long sys_munmap(unsigned long
  *  anonymous maps.  eventually we may be able to do some
  *  brk-specific accounting here.
  */
-unsigned long do_brk(unsigned long addr, unsigned long len)
+static unsigned long __do_brk(unsigned long addr, unsigned long len, int lowpri)
 {
 	struct mm_struct * mm = current->mm;
 	struct vm_area_struct * vma, * prev;
@@ -2013,6 +2043,10 @@ unsigned long do_brk(unsigned long addr,
 	if (security_vm_enough_memory(len >> PAGE_SHIFT))
 		return -ENOMEM;
 
+	if (ub_memory_charge(mm_ub(mm), len, flags, NULL, lowpri))
+		goto out_unacct;
+		
+
 	/* Can we just expand an old private anonymous mapping? */
 	if (vma_merge(mm, prev, addr, addr + len, flags,
 					NULL, NULL, pgoff, NULL))
@@ -2021,8 +2055,11 @@ unsigned long do_brk(unsigned long addr,
 	/*
 	 * create a vma struct for an anonymous mapping
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_alloc(vm_area_cachep,
+			SLAB_KERNEL | (lowpri ? 0 : __GFP_SOFT_UBC));
 	if (!vma) {
+		ub_memory_uncharge(mm_ub(mm), len, flags, NULL);
+out_unacct:
 		vm_unacct_memory(len >> PAGE_SHIFT);
 		return -ENOMEM;
 	}
@@ -2044,6 +2081,11 @@ out:
 	return addr;
 }
 
+unsigned long do_brk(unsigned long addr, unsigned long len)
+{
+	return __do_brk(addr, len, UB_SOFT);
+}
+
 EXPORT_SYMBOL(do_brk);
 
 /* locking version of do_brk. */
@@ -2052,7 +2094,7 @@ unsigned long do_brk_locked(unsigned lon
 	unsigned long ret;
 
 	down_write(&current->mm->mmap_sem);
-	ret = do_brk(addr, len);
+	ret = __do_brk(addr, len, UB_SOFT);
 	up_write(&current->mm->mmap_sem);
 
 	return ret;
diff -Nurap linux-2.6.9-100.orig/mm/mprotect.c linux-2.6.9-ve023stab054/mm/mprotect.c
--- linux-2.6.9-100.orig/mm/mprotect.c	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/mprotect.c	2011-06-15 19:26:21.000000000 +0400
@@ -18,6 +18,7 @@
 #include <linux/security.h>
 #include <linux/mempolicy.h>
 #include <linux/personality.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -25,6 +26,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static inline void
 change_pte_range(pmd_t *pmd, unsigned long address,
 		unsigned long size, pgprot_t newprot)
@@ -116,6 +119,8 @@ mprotect_fixup(struct vm_area_struct *vm
 {
 	struct mm_struct * mm = vma->vm_mm;
 	unsigned long charged = 0, old_end = vma->vm_end;
+	unsigned long vma_rss;
+	int prot_dir;
 	pgprot_t newprot;
 	unsigned int oldflags;
 	pgoff_t pgoff;
@@ -126,6 +131,17 @@ mprotect_fixup(struct vm_area_struct *vm
 		return 0;
 	}
 
+	spin_lock(&mm->page_table_lock);
+	vma_rss = pages_in_vma_range(vma, start, end);
+	spin_unlock(&mm->page_table_lock);
+	charged = ((end - start) >> PAGE_SHIFT);
+
+	prot_dir = ub_protected_charge(mm_ub(mm), charged - vma_rss,
+			newflags, vma);
+	error = -ENOMEM;
+	if (prot_dir == PRIVVM_ERROR)
+		goto fail_nocharge;
+
 	/*
 	 * If we make a private mapping writable we increase our commit;
 	 * but (without finer accounting) cannot reduce our commit if we
@@ -136,9 +152,8 @@ mprotect_fixup(struct vm_area_struct *vm
 	 */
 	if (newflags & VM_WRITE) {
 		if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
-			charged = (end - start) >> PAGE_SHIFT;
 			if (security_vm_enough_memory(charged))
-				return -ENOMEM;
+				goto fail_noacct;
 			newflags |= VM_ACCOUNT;
 		}
 	}
@@ -185,11 +200,17 @@ success:
 	if (oldflags & VM_EXEC)
 		arch_remove_exec_range(current->mm, old_end);
 	change_protection(vma, start, end, newprot);
+	if (prot_dir == PRIVVM_TO_SHARED)
+		__ub_unused_privvm_dec(mm_ub(mm), charged - vma_rss);
 	vm_stat_account(vma);
 	return 0;
 
 fail:
 	vm_unacct_memory(charged);
+fail_noacct:
+	if (prot_dir == PRIVVM_TO_PRIVATE)
+		__ub_unused_privvm_dec(mm_ub(mm), charged - vma_rss);
+fail_nocharge:
 	return error;
 }
 
@@ -294,3 +315,4 @@ out:
 	up_write(&current->mm->mmap_sem);
 	return error;
 }
+EXPORT_SYMBOL(sys_mprotect);
diff -Nurap linux-2.6.9-100.orig/mm/mremap.c linux-2.6.9-ve023stab054/mm/mremap.c
--- linux-2.6.9-100.orig/mm/mremap.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/mremap.c	2011-06-15 19:26:19.000000000 +0400
@@ -21,6 +21,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
@@ -81,6 +83,7 @@ static inline pte_t *alloc_one_pte_map(s
 
 static int
 move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
+		struct vm_area_struct *new_vma,
 		unsigned long new_addr)
 {
 	struct address_space *mapping = NULL;
@@ -143,6 +146,7 @@ move_one_page(struct vm_area_struct *vma
 }
 
 static unsigned long move_page_tables(struct vm_area_struct *vma,
+		struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long old_addr,
 		unsigned long len)
 {
@@ -156,7 +160,8 @@ static unsigned long move_page_tables(st
 	 * only a few pages.. This also makes error recovery easier.
 	 */
 	for (offset = 0; offset < len; offset += PAGE_SIZE) {
-		if (move_one_page(vma, old_addr+offset, new_addr+offset) < 0)
+		if (move_one_page(vma, old_addr+offset, 
+					new_vma, new_addr+offset) < 0)
 			break;
 		cond_resched();
 	}
@@ -175,26 +180,29 @@ static unsigned long move_vma(struct vm_
 	unsigned long excess = 0;
 	int split = 0;
 
+	if (ub_memory_charge(mm_ub(mm), new_len, vma->vm_flags,
+				vma->vm_file, UB_HARD))
+		return -ENOMEM;
 	/*
 	 * We'd prefer to avoid failure later on in do_munmap:
 	 * which may split one vma into three before unmapping.
 	 */
 	if (mm->map_count >= sysctl_max_map_count - 3)
-		return -ENOMEM;
+		goto out_nomem;
 
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
 	if (!new_vma)
-		return -ENOMEM;
+		goto out_nomem;
 
-	moved_len = move_page_tables(vma, new_addr, old_addr, old_len);
+	moved_len = move_page_tables(vma, new_vma, new_addr, old_addr, old_len);
 	if (moved_len < old_len) {
 		/*
 		 * On error, move entries back from new area to old,
 		 * which will succeed since page tables still there,
 		 * and then proceed to unmap new area instead of old.
 		 */
-		move_page_tables(new_vma, old_addr, new_addr, moved_len);
+		move_page_tables(new_vma, vma, old_addr, new_addr, moved_len);
 		vma = new_vma;
 		old_len = new_len;
 		old_addr = new_addr;
@@ -232,7 +240,12 @@ static unsigned long move_vma(struct vm_
 					   new_addr + new_len);
 	}
 
-	return new_addr;
+	if (new_addr != -ENOMEM)
+		return new_addr;
+
+out_nomem:
+	ub_memory_uncharge(mm_ub(mm), new_len, vma->vm_flags, vma->vm_file);
+	return -ENOMEM;
 }
 
 /*
@@ -360,6 +373,12 @@ unsigned long do_mremap(unsigned long ad
 		if (max_addr - addr >= new_len) {
 			int pages = (new_len - old_len) >> PAGE_SHIFT;
 
+			ret = ub_memory_charge(mm_ub(vma->vm_mm), 
+					new_len - old_len, vma->vm_flags, 
+					vma->vm_file, UB_HARD);
+			if (ret != 0)
+				goto out;
+
 			vma_adjust(vma, vma->vm_start,
 				addr + new_len, vma->vm_pgoff, NULL);
 
diff -Nurap linux-2.6.9-100.orig/mm/oom_kill.c linux-2.6.9-ve023stab054/mm/oom_kill.c
--- linux-2.6.9-100.orig/mm/oom_kill.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/oom_kill.c	2011-06-15 19:26:22.000000000 +0400
@@ -15,13 +15,25 @@
  *  kernel subsystems and hints as to where to find out what things do.
  */
 
+#include <linux/bitops.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/virtinfo.h>
+#include <linux/module.h>
 #include <linux/swap.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
+#include <linux/slab.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+
+spinlock_t oom_generation_lock = SPIN_LOCK_UNLOCKED;
+static DECLARE_WAIT_QUEUE_HEAD(oom_waitqh);
+int oom_kill_counter;
+int oom_generation;
 
 int oom_kill_enabled = 1;
 int sysctl_panic_on_oom;
@@ -54,7 +66,7 @@ static unsigned long badness(struct task
 	if (!p->mm)
 		return 0;
 
-	if (p->flags & PF_MEMDIE)
+	if (test_tsk_thread_flag(p, TIF_MEMDIE))
 		return 0;
 	/*
 	 * The memory size of the process is the basis for the badness.
@@ -116,25 +128,50 @@ static unsigned long badness(struct task
  *
  * (not docbooked, we don't want this one cluttering up the manual)
  */
-static struct task_struct * select_bad_process(void)
+static struct task_struct * select_bad_process(struct user_beancounter *ub)
 {
+	unsigned long points;
 	unsigned long maxpoints = 0;
 	struct task_struct *g, *p;
 	struct task_struct *chosen = NULL;
 	struct timespec uptime;
+	struct user_beancounter *mub;
 
 	do_posix_clock_monotonic_gettime(&uptime);
-	do_each_thread(g, p)
-		if (p->pid) {
-			unsigned long points = badness(p, uptime.tv_sec);
-			if (points > maxpoints) {
-				chosen = p;
-				maxpoints = points;
-			}
-			if (p->flags & PF_SWAPOFF)
-				return p;
+	do_each_thread_all(g, p) {
+		if (!p->pid)
+			continue;
+		if (!p->mm)
+			continue;
+
+#if 0
+		/*
+		 * swapoff check.
+		 * Pro: do not let opportunistic swapoff kill the whole system;
+		 * if the system enter OOM state, better stop swapoff.
+		 * Contra: essential services must survive without swap
+		 * (otherwise, the system is grossly misconfigured),
+		 * and disabling swapoff completely, with cryptic diagnostic
+		 * "interrupted system call", looks like a bad idea.
+		 * 2006/02/28 SAW
+		 */
+		if (!test_tsk_thread_flag(p, TIF_MEMDIE) &&
+			       (p->flags & PF_SWAPOFF))
+			return p;
+#endif
+
+		for (mub = mm_ub(p->mm); mub != NULL; mub = mub->parent)
+			if (mub == ub)
+				break;
+		if (mub != ub) /* wrong beancounter */
+			continue;
+
+		points = badness(p, uptime.tv_sec);
+		if (points > maxpoints) {
+			chosen = p;
+			maxpoints = points;
 		}
-	while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	return chosen;
 }
 
@@ -153,15 +190,22 @@ static void __oom_kill_task(task_t *p)
 		return;
 	}
 	task_unlock(p);
-	printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", p->pid, p->comm);
+	printk(KERN_ERR "Out of Memory: Killing process %d (%.20s), flags=%lx, "
+	       "mm=%p gen=%d count=%d.\n",
+	       p->pid, p->comm, p->flags, p->mm,
+	       oom_generation, oom_kill_counter);
 
 	/*
 	 * We give our sacrificial lamb high priority and access to
 	 * all the memory it needs. That way it should be able to
 	 * exit() and clear out its resources quickly...
-	 */
-	p->time_slice = HZ;
-	p->flags |= PF_MEMALLOC | PF_MEMDIE;
+	 *
+	 * This code used to be a mainstream bug.
+	 * It won't work for p != current.  20060607  SAW
+	 */
+	if (p == current)
+		p->time_slice = HZ;
+	set_tsk_thread_flag(p, TIF_MEMDIE);
 
 	/* This process has hardware access, be more careful. */
 	if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) {
@@ -171,28 +215,90 @@ static void __oom_kill_task(task_t *p)
 	}
 }
 
-static struct mm_struct *oom_kill_task(task_t *p)
+/**
+ * oom_kill - do a complete job of killing a process
+ *
+ * Returns TRUE if selected process is unkillable.
+ * Called with oom_generation_lock and tasklist_lock held, drops them.
+ */
+static int oom_kill(struct task_struct *p,
+		struct user_beancounter *ub, long ub_maxover)
 {
-	struct mm_struct *mm = get_task_mm(p);
-	if (!mm || mm == &init_mm)
-		return NULL;
+	struct mm_struct *mm;
+	struct task_struct *g, *q;
+	uid_t ub_uid;
+	int suicide;
+
+	mm = get_task_mm(p);
+	if (mm == &init_mm) {
+		mmput(mm);
+		mm = NULL;
+	}
+	if (mm == NULL)
+		return -1;
+
+	/*
+	 * The following message showing mm, its size, and free space
+	 * should be printed regardless of CONFIG_USER_RESOURCE.
+	 */
+	ub_uid = (ub ? ub->ub_uid : -1);
+	printk(KERN_INFO"MM to kill %p (UB=%d, UBover=%ld, VM=%lu, free=%u).\n",
+			mm, ub_uid, ub_maxover,
+			mm->total_vm, nr_free_pages());
+
+	/*
+	 * kill all processes that share the ->mm (i.e. all threads),
+	 * but are in a different thread group
+	 */
+	suicide = 0;
+
+	/* oom_generation_lock must be held */
+	oom_kill_counter++;
+
 	__oom_kill_task(p);
-	return mm;
-}
+	if (p == current)
+		suicide = 1;
+	do_each_thread_all(g, q) {
+		if (q->mm == mm && q->tgid != p->tgid) {
+			__oom_kill_task(q);
+			if (q == current)
+				suicide = 1;
+		}
+	} while_each_thread_all(g, q);
+	read_unlock(&tasklist_lock);
 
+	wake_up_all(&oom_waitqh);
+	spin_unlock(&oom_generation_lock);
+	ub_oomkill_task(mm, ub, ub_maxover); /* nonblocking but long */
+	mmput(mm);
+
+	/*
+	 * Make kswapd go out of the way, so "p" has a good chance of
+	 * killing itself before someone else gets the chance to ask
+	 * for more memory.
+	 */
+	if (!suicide)
+		yield();
+
+	return 0;
+}
 
 /**
- * oom_kill - kill the "best" process when we run out of memory
+ * oom_select_and_kill - kill the "best" process when we run out of memory
  *
  * If we run out of memory, we have the choice between either
  * killing a random task (bad), letting the system crash (worse)
  * OR try to be smart about which process to kill. Note that we
  * don't have to be perfect here, we just have to be good.
+ *
+ * Called with oom_generation_lock held, drops it.
  */
-static void oom_kill(void)
+static void oom_select_and_kill(void)
 {
-	struct mm_struct *mm;
-	struct task_struct *g, *p, *q;
+	struct user_beancounter *ub;
+	struct task_struct *p;
+	long ub_maxover;
+	int r;
 
 	/* print the memory stats whenever we OOM kill */
 	show_mem();
@@ -205,39 +311,29 @@ static void oom_kill(void)
 	if (sysctl_panic_on_oom)
 		panic("out of memory. panic_on_oom is selected\n");
 
+	ub_clear_oom();
+
 	read_lock(&tasklist_lock);
 retry:
-	p = select_bad_process();
+	ub = ub_select_worst(&ub_maxover);
+	p = select_bad_process(ub);
 
 	/* Found nothing?!?! Either we hang forever, or we panic. */
 	if (!p) {
-		show_free_areas();
-		panic("Out of memory and no killable processes...\n");
-	}
+		if (!ub) {
+			show_slab_info();
+			show_free_areas();
+			panic("Out of memory and no killable processes...\n");
+		}
 
-	mm = oom_kill_task(p);
-	if (!mm)
+		put_beancounter(ub);
 		goto retry;
-	/*
-	 * kill all processes that share the ->mm (i.e. all threads),
-	 * but are in a different thread group
-	 */
-	do_each_thread(g, q)
-		if (q->mm == mm && q->tgid != p->tgid)
-			__oom_kill_task(q);
-	while_each_thread(g, q);
-	if (!p->mm)
-		printk(KERN_INFO "Fixed up OOM kill of mm-less task\n");
-	read_unlock(&tasklist_lock);
-	mmput(mm);
+	}
 
-	/*
-	 * Make kswapd go out of the way, so "p" has a good chance of
-	 * killing itself before someone else gets the chance to ask
-	 * for more memory.
-	 */
-	yield();
-	return;
+	r = oom_kill(p, ub, ub_maxover);
+	put_beancounter(ub);
+	if (r)
+		goto retry;
 }
 
 static struct notifier_block *oom_notify_list;
@@ -254,82 +350,118 @@ int unregister_oom_notifier(struct notif
 }
 EXPORT_SYMBOL_GPL(unregister_oom_notifier);
 
-/**
- * out_of_memory - is the system out of memory?
+/*
+ * Called with oom_generation_lock held, drops it.
  */
-void out_of_memory(int gfp_mask)
+void oom_select_and_kill_sc(struct user_beancounter *scope)
 {
-	/*
-	 * oom_lock protects out_of_memory()'s static variables.
-	 * It's a global lock; this is not performance-critical.
-	 */
-	static spinlock_t oom_lock = SPIN_LOCK_UNLOCKED;
-	static unsigned long first, last, count, lastkill;
-	unsigned long now, since;
-	unsigned long freed = 0;
-
-	notifier_call_chain(&oom_notify_list, 0, &freed);
-	if (freed > 0)
-		/* Got some memory back in the last second. */
+	struct user_beancounter *ub;
+	struct task_struct *p;
+	struct mm_struct *mm;
+
+	ub_clear_oom();
+	ub = get_beancounter(scope);
+
+	read_lock(&tasklist_lock);
+retry:
+	p = select_bad_process(ub);
+	if (!p) {
+		read_unlock(&tasklist_lock);
 		return;
+	}
 
-	spin_lock(&oom_lock);
-	now = jiffies;
-	since = now - last;
-	last = now;
+	if (oom_kill(p, ub, 0))
+		goto retry;
 
-	/*
-	 * If it's been a long time since last failure,
-	 * we're not oom.
-	 */
-	if (since > 5*HZ)
-		goto reset;
+	put_beancounter(ub);
+}
 
-	/*
-	 * If we haven't tried for at least one second,
-	 * we're not really oom.
-	 */
-	since = now - first;
-	if (since < HZ)
+static void do_out_of_memory(struct oom_freeing_stat *stat)
+{
+	spin_lock(&oom_generation_lock);
+	if (oom_generation != stat->oom_generation)
+		/* OOM-killed process has exited */
 		goto out_unlock;
-
-	/*
-	 * If we have gotten only a few failures,
-	 * we're not really oom. 
-	 */
-	if (++count < 10)
+	if (test_tsk_thread_flag(current, TIF_MEMDIE))
+		/* We have been killed. Move out faster */
 		goto out_unlock;
 
-	/*
-	 * If we just killed a process, wait a while
-	 * to give that task a chance to exit. This
-	 * avoids killing multiple processes needlessly.
-	 */
-	since = now - lastkill;
-	if (since < HZ*5)
-		goto out_unlock;
+	if (oom_kill_counter) {
+		long timeout;
 
-	/*
-	 * Ok, really out of memory. Kill something.
-	 */
-	lastkill = now;
+		/* OOM in progress */
 
-	printk("oom-killer: gfp_mask=0x%x\n", gfp_mask);
+		DECLARE_WAITQUEUE(wait, current);
+		add_wait_queue(&oom_waitqh, &wait);
 
-	/* oom_kill() sleeps */
-	spin_unlock(&oom_lock);
-	oom_kill();
-	spin_lock(&oom_lock);
+		timeout = 5*HZ;
+		do {
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			if (test_tsk_thread_flag(current, TIF_MEMDIE)) {
+				__set_current_state(TASK_RUNNING);
+				remove_wait_queue(&oom_waitqh, &wait);
+				/* We have been killed. Move out faster */
+				goto out_unlock;
+			}
+			spin_unlock(&oom_generation_lock);
 
-reset:
-	/*
-	 * We dropped the lock above, so check to be sure the variable
-	 * first only ever increases to prevent false OOM's.
-	 */
-	if (time_after(now, first))
-		first = now;
-	count = 0;
+			timeout = schedule_timeout(timeout);
+
+			spin_lock(&oom_generation_lock);
+		} while (timeout > 0);
+
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&oom_waitqh, &wait);
+
+		if (oom_generation != stat->oom_generation)
+			goto out_unlock;
+
+		/*
+		 * Some process is stuck exiting.
+		 * No choice other than to kill something else.
+		 */
+	}
+	oom_select_and_kill();
+	return;
 
 out_unlock:
-	spin_unlock(&oom_lock);
+	spin_unlock(&oom_generation_lock);
+}
+
+void do_out_of_memory_sc(struct user_beancounter *ub)
+{
+	spin_lock(&oom_generation_lock);
+	oom_select_and_kill_sc(ub);
+}
+EXPORT_SYMBOL(do_out_of_memory_sc);
+
+/**
+ * out_of_memory - is the system out of memory?
+ */
+void out_of_memory(struct oom_freeing_stat *stat, int gfp_mask)
+{
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_OUTOFMEM, stat)
+			& (NOTIFY_OK | NOTIFY_FAIL))
+		return;
+
+	if (nr_swap_pages > 0) {
+		/* some pages have been freed */
+		if (stat->freed)
+			return;
+		/* some IO was started */
+		if (stat->written)
+			return;
+		/* some pages have been swapped out, ref. counter removed */
+		if (stat->swapped)
+			return;
+		/* some slabs were shrinked */
+		if (stat->slabs)
+			return;
+	}
+
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_OUTOFMEM, stat)
+			& (NOTIFY_OK | NOTIFY_FAIL))
+  		return;
+
+	do_out_of_memory(stat);
 }
diff -Nurap linux-2.6.9-100.orig/mm/page_alloc.c linux-2.6.9-ve023stab054/mm/page_alloc.c
--- linux-2.6.9-100.orig/mm/page_alloc.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/page_alloc.c	2011-06-15 19:26:22.000000000 +0400
@@ -31,9 +31,12 @@
 #include <linux/topology.h>
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
+#include <linux/kernel_stat.h>
 
 #include <asm/tlbflush.h>
 
+#include <ub/ub_mem.h>
+
 DECLARE_BITMAP(node_online_map, MAX_NUMNODES);
 struct pglist_data *pgdat_list;
 unsigned long totalram_pages;
@@ -42,7 +45,9 @@ long nr_swap_pages;
 int percpu_pagelist_fraction;
 int numnodes = 1;
 int sysctl_lower_zone_protection = 0;
+int alloc_fail_warn = 0;
 
+EXPORT_SYMBOL(pgdat_list);
 EXPORT_SYMBOL(totalram_pages);
 EXPORT_SYMBOL(nr_swap_pages);
 
@@ -285,6 +290,7 @@ void __free_pages_ok(struct page *page, 
 		free_pages_check(__FUNCTION__, page + i);
 	list_add(&page->lru, &list);
 	kernel_map_pages(page, 1<<order, 0);
+	ub_page_uncharge(page, order);
 	free_pages_bulk(page_zone(page), 1, &list, order);
 }
 
@@ -357,7 +363,7 @@ static void prep_new_page(struct page *p
 
 	page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
 			1 << PG_referenced | 1 << PG_arch_1 |
-			1 << PG_checked | 1 << PG_mappedtodisk);
+			1 << PG_checked | 1 << PG_mappedtodisk | 1 << PG_checkpointed);
 	page->private = 0;
 	set_page_refs(page, order);
 }
@@ -517,6 +523,7 @@ static void fastcall free_hot_cold_page(
 		page->mapping = NULL;
 	free_pages_check(__FUNCTION__, page);
 	pcp = &zone->pageset[get_cpu()].pcp[cold];
+	ub_page_uncharge(page, 0);
 	local_irq_save(flags);
 	if (pcp->count >= pcp->high)
 		pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
@@ -582,6 +589,26 @@ buffered_rmqueue(struct zone *zone, int 
 	return page;
 }
 
+static void __alloc_collect_stats(unsigned int gfp_mask,
+			unsigned int order, struct page *page, cycles_t time)
+{
+	int ind;
+	unsigned long flags;
+
+	time = (jiffies - time) * cycles_per_jiffy;
+	if (!(gfp_mask & __GFP_WAIT))
+		ind = 0;
+	else if (!(gfp_mask & __GFP_HIGHMEM))
+		ind = (order > 0 ? 2 : 1);
+	else
+		ind = (order > 0 ? 4 : 3);
+	spin_lock_irqsave(&kstat_glb_lock, flags);
+	KSTAT_LAT_ADD(&kstat_glob.alloc_lat[ind], time);
+	if (!page)
+		kstat_glob.alloc_fails[ind]++;
+	spin_unlock_irqrestore(&kstat_glb_lock, flags);
+}
+
 /*
  * This is the 'heart' of the zoned buddy allocator.
  *
@@ -612,6 +639,7 @@ __alloc_pages(unsigned int gfp_mask, uns
 	int alloc_type;
 	int do_retry;
 	int can_try_harder;
+	cycles_t start_time;
 
 	might_sleep_if(wait);
 
@@ -629,6 +657,7 @@ __alloc_pages(unsigned int gfp_mask, uns
 		return NULL;
 	}
 
+	start_time = jiffies;
 	alloc_type = zone_idx(zones[0]);
 
 	/* Go through the zonelist once, looking for a zone with enough free */
@@ -666,14 +695,20 @@ __alloc_pages(unsigned int gfp_mask, uns
 			goto got_pg;
 	}
 
+rebalance:
 	/* This allocation should allow future memory freeing. */
-	if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
+	if (((p->flags & PF_MEMALLOC) || test_tsk_thread_flag(p, TIF_MEMDIE)) &&
+			!in_interrupt()) {
 		/* go through the zonelist yet again, ignoring mins */
 		for (i = 0; (z = zones[i]) != NULL; i++) {
 			page = buffered_rmqueue(z, order, gfp_mask);
 			if (page)
 				goto got_pg;
 		}
+		if (gfp_mask & __GFP_NOFAIL) {
+			blk_congestion_wait(WRITE, HZ/50);
+			goto rebalance;
+		}
 		goto nopage;
 	}
 
@@ -681,9 +716,8 @@ __alloc_pages(unsigned int gfp_mask, uns
 	if (!wait)
 		goto nopage;
 
-rebalance:
 	/* We now go into synchronous reclaim */
-	if ((p->flags & PF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
+	if (test_tsk_thread_flag(p, TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
 		goto nopage;
 	p->flags |= PF_MEMALLOC;
 	reclaim_state.reclaimed_slab = 0;
@@ -724,17 +758,26 @@ rebalance:
 	}
 
 nopage:
-	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
+	if (alloc_fail_warn && !(gfp_mask & __GFP_NOWARN)
+		&& printk_ratelimit()) {
 		printk(KERN_WARNING "%s: page allocation failure."
 			" order:%d, mode:0x%x\n",
 			p->comm, order, gfp_mask);
 		dump_stack();
 		show_mem();
 	}
+	__alloc_collect_stats(gfp_mask, order, NULL, start_time);
 	return NULL;
 got_pg:
 	zone_statistics(zonelist, z);
 	kernel_map_pages(page, 1 << order, 1);
+	__alloc_collect_stats(gfp_mask, order, page, start_time);
+
+	if (ub_page_charge(page, order, gfp_mask)) {
+		__free_pages(page, order);
+		page = NULL;
+	}
+
 	return page;
 }
 
@@ -882,6 +925,30 @@ unsigned int nr_free_highpages (void)
 }
 #endif
 
+unsigned int nr_lowmem_pages(void)
+{
+	pg_data_t *pgdat;
+	unsigned int pages = 0;
+
+	for_each_pgdat(pgdat)
+		pages += pgdat->node_zones[ZONE_NORMAL].present_pages;
+
+	return pages;
+}
+EXPORT_SYMBOL(nr_lowmem_pages);
+
+
+unsigned int nr_free_lowpages (void)
+{
+	pg_data_t *pgdat;
+	unsigned int pages = 0;
+
+	for_each_pgdat(pgdat)
+		pages += pgdat->node_zones[ZONE_NORMAL].free_pages;
+
+	return pages;
+}
+
 #ifdef CONFIG_NUMA
 static void show_node(struct zone *zone)
 {
@@ -1121,7 +1188,7 @@ void show_free_areas(void)
 
 	for_each_zone(zone) {
 		struct list_head *elem;
- 		unsigned long nr, flags, order, total = 0;
+ 		unsigned long nr[MAX_ORDER], flags, order, total = 0;
 
 		show_node(zone);
 		printk("%s: ", zone->name);
@@ -1130,15 +1197,16 @@ void show_free_areas(void)
 			continue;
 		}
 
+		spin_lock_irqsave(&zone->lock, flags);
 		for (order = 0; order < MAX_ORDER; order++) {
-			nr = 0;
-			spin_lock_irqsave(&zone->lock, flags);
+			nr[order] = 0;
 			list_for_each(elem, &zone->free_area[order].free_list)
-				++nr;
-			spin_unlock_irqrestore(&zone->lock, flags);
-			total += nr << order;
-			printk("%lu*%lukB ", nr, K(1UL) << order);
+				++nr[order];
+			total += nr[order] << order;
 		}
+		spin_unlock_irqrestore(&zone->lock, flags);
+		for (order = 0; order < MAX_ORDER; order++)
+			printk("%lu*%lukB ", nr[order], K(1UL) << order);
 		printk("= %lukB\n", K(total));
 	}
 
@@ -1779,7 +1847,10 @@ static void *vmstat_start(struct seq_fil
 	m->private = ps;
 	if (!ps)
 		return ERR_PTR(-ENOMEM);
-	get_full_page_state(ps);
+	if (ve_is_super(get_exec_env()))
+		get_full_page_state(ps);
+	else
+		memset(ps, 0, sizeof(*ps));
 	ps->pgpgin /= 2;		/* sectors -> kbytes */
 	ps->pgpgout /= 2;
 	return (unsigned long *)ps + *pos;
diff -Nurap linux-2.6.9-100.orig/mm/pdflush.c linux-2.6.9-ve023stab054/mm/pdflush.c
--- linux-2.6.9-100.orig/mm/pdflush.c	2004-10-19 01:55:36.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/pdflush.c	2011-06-15 19:26:18.000000000 +0400
@@ -106,8 +106,8 @@ static int __pdflush(struct pdflush_work
 		spin_unlock_irq(&pdflush_lock);
 
 		schedule();
-		if (current->flags & PF_FREEZE) {
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE)) {
+			refrigerator();
 			spin_lock_irq(&pdflush_lock);
 			continue;
 		}
diff -Nurap linux-2.6.9-100.orig/mm/rmap.c linux-2.6.9-ve023stab054/mm/rmap.c
--- linux-2.6.9-100.orig/mm/rmap.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/rmap.c	2011-06-15 19:26:22.000000000 +0400
@@ -52,6 +52,7 @@
 #include <linux/init.h>
 #include <linux/rmap.h>
 #include <linux/rcupdate.h>
+#include <linux/module.h>
 
 #include <asm/tlbflush.h>
 
@@ -73,6 +74,8 @@ static inline void dec_mapped_high(struc
 #define dec_mapped_high(page)
 #endif
 
+#include <ub/ub_vmpages.h>
+
 //#define RMAP_DEBUG /* can be enabled only for debugging */
 
 kmem_cache_t *anon_vma_cachep;
@@ -134,6 +137,7 @@ int anon_vma_prepare(struct vm_area_stru
 	}
 	return 0;
 }
+EXPORT_SYMBOL(anon_vma_prepare);
 
 void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
 {
@@ -169,6 +173,7 @@ void anon_vma_link(struct vm_area_struct
 		spin_unlock(&anon_vma->lock);
 	}
 }
+EXPORT_SYMBOL(anon_vma_link);
 
 void anon_vma_unlink(struct vm_area_struct *vma)
 {
@@ -204,7 +209,8 @@ static void anon_vma_ctor(void *data, km
 void __init anon_vma_init(void)
 {
 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
-			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor, NULL);
+			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_UBC,
+			anon_vma_ctor, NULL);
 }
 
 /*
@@ -489,24 +495,42 @@ void page_add_file_rmap(struct page *pag
 	}
 }
 
+static void page_mapcount_bug(struct page *page, int mc,
+		struct vm_area_struct *vma)
+{
+	struct address_space *mapping;
+
+	printk(KERN_EMERG "BUG: Page %p mapcount went negative %d\n", page, mc);
+	printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
+		(int)(2*sizeof(page_flags_t)), (unsigned long)page->flags,
+		page->mapping, page_mapcount(page), page_count(page));
+
+	mapping = page_mapping(page);
+	if (mapping)
+		printk(KERN_EMERG "mapping %p (ops %p)\n",
+				mapping, mapping->a_ops);
+
+	printk(KERN_EMERG "VMA: %p, flags %0lx, ops %p\n",
+			vma, vma->vm_flags, vma->vm_ops);
+	BUG();
+}
+
 /**
  * page_remove_rmap - take down pte mapping from a page
  * @page: page to remove mapping from
  *
  * Caller needs to hold the mm->page_table_lock.
  */
-void page_remove_rmap(struct page *page)
+void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
 {
+	int mc;
 	BUG_ON(PageReserved(page));
 
 	if (atomic_add_negative(-1, &page->_mapcount)) {
-		if (unlikely(page_mapcount(page) < 0)) {
-			printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
-			printk (KERN_EMERG "  page->flags = %lx\n", page->flags);
-			printk (KERN_EMERG "  page->count = %x\n", page_count(page));
-			printk (KERN_EMERG "  page->mapping = %p\n", page->mapping);
-			BUG();
-		}
+		mc = page_mapcount(page);
+		if (unlikely(mc < 0))
+			page_mapcount_bug(page, mc, vma);
+
 		/*
 		 * It would be tidy to reset the PageAnon mapping here,
 		 * but that might overwrite a racing page_add_anon_rmap
@@ -518,6 +542,10 @@ void page_remove_rmap(struct page *page)
 		 */
 		if (page_test_and_clear_dirty(page))
 			set_page_dirty(page);
+		/* Well, when a page is unmapped, we cannot keep PG_checkpointed flag,
+		 * it is not accessible via process VM and we have no way to
+		 * reset its state */
+		clear_bit(PG_checkpointed, &(page)->flags);
 		dec_page_state(nr_mapped);
 		dec_mapped_high(page);
 
@@ -624,7 +652,10 @@ static int try_to_unmap_one(struct page 
 	}
 
 	mm->rss--;
-	page_remove_rmap(page);
+	mm_ub(mm)->ub_perfstat[smp_processor_id()].unmap++;
+	ub_unused_privvm_inc(mm_ub(mm), 1, vma);
+	pb_remove_ref(page, mm_ub(mm));
+	page_remove_rmap(page, vma);
 	page_cache_release(page);
 
 out_unmap:
@@ -669,6 +700,7 @@ static void try_to_unmap_cluster(unsigne
 	unsigned long address;
 	unsigned long end;
 	unsigned long pfn;
+	unsigned long old_rss;
 
 	/*
 	 * We need the page_table_lock to protect us from page faults,
@@ -691,6 +723,7 @@ static void try_to_unmap_cluster(unsigne
 	if (!pmd_present(*pmd))
 		goto out_unlock;
 
+	old_rss = mm->rss;
 	for (original_pte = pte = pte_offset_map(pmd, address);
 			address < end; pte++, address += PAGE_SIZE) {
 
@@ -721,11 +754,15 @@ static void try_to_unmap_cluster(unsigne
 		if (pte_dirty(pteval))
 			set_page_dirty(page);
 
-		page_remove_rmap(page);
-		page_cache_release(page);
+		page_remove_rmap(page, vma);
 		mm->rss--;
+		mm_ub(mm)->ub_perfstat[smp_processor_id()].unmap++;
+		pb_remove_ref(page, mm_ub(mm));
+		page_cache_release(page);
 		(*mapcount)--;
 	}
+	if (old_rss > mm->rss)
+		ub_unused_privvm_inc(mm_ub(mm), old_rss - mm->rss, vma);
 
 	pte_unmap(original_pte);
 
diff -Nurap linux-2.6.9-100.orig/mm/shmem.c linux-2.6.9-ve023stab054/mm/shmem.c
--- linux-2.6.9-100.orig/mm/shmem.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/shmem.c	2011-06-15 19:26:21.000000000 +0400
@@ -50,6 +50,9 @@
 #include <asm/div64.h>
 #include <asm/pgtable.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+
 /* This magic number is used in glibc for posix shared memory */
 #define TMPFS_MAGIC	0x01021994
 
@@ -212,7 +215,7 @@ static void shmem_free_block(struct inod
  *
  * It has to be called with the spinlock held.
  */
-static void shmem_recalc_inode(struct inode *inode)
+static void shmem_recalc_inode(struct inode *inode, unsigned long swp_freed)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	long freed;
@@ -222,6 +225,9 @@ static void shmem_recalc_inode(struct in
 		struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 		info->alloced -= freed;
 		shmem_unacct_blocks(info->flags, freed);
+		if (freed > swp_freed)
+			ub_tmpfs_respages_dec(shm_info_ub(info), 
+					freed - swp_freed);
 		if (sbinfo) {
 			spin_lock(&sbinfo->stat_lock);
 			sbinfo->free_blocks += freed;
@@ -331,6 +337,11 @@ static void shmem_swp_set(struct shmem_i
 	info->swapped += incdec;
 	if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT)
 		kmap_atomic_to_page(entry)->nr_swapped += incdec;
+
+	if (incdec == 1)
+		ub_tmpfs_respages_dec(shm_info_ub(info), 1);
+	else
+		ub_tmpfs_respages_inc(shm_info_ub(info), 1);
 }
 
 /*
@@ -347,14 +358,24 @@ static swp_entry_t *shmem_swp_alloc(stru
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	struct page *page = NULL;
 	swp_entry_t *entry;
+	unsigned long ub_val;
 
 	if (sgp != SGP_WRITE &&
 	    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
 		return ERR_PTR(-EINVAL);
 
+	ub_val = 0;
+	if (info->next_index <= index) {
+		ub_val = index + 1 - info->next_index;
+		if (ub_shmpages_charge(shm_info_ub(info), ub_val))
+			return ERR_PTR(-ENOSPC);
+	}
+
 	while (!(entry = shmem_swp_entry(info, index, &page))) {
-		if (sgp == SGP_READ)
-			return shmem_swp_map(ZERO_PAGE(0));
+		if (sgp == SGP_READ) {
+			entry = shmem_swp_map(ZERO_PAGE(0));
+			goto out;
+		}
 		/*
 		 * Test free_blocks against 1 not 0, since we have 1 data
 		 * page (and perhaps indirect index pages) yet to allocate:
@@ -364,7 +385,8 @@ static swp_entry_t *shmem_swp_alloc(stru
 			spin_lock(&sbinfo->stat_lock);
 			if (sbinfo->free_blocks <= 1) {
 				spin_unlock(&sbinfo->stat_lock);
-				return ERR_PTR(-ENOSPC);
+				entry = ERR_PTR(-ENOSPC);
+				goto out;
 			}
 			sbinfo->free_blocks--;
 			inode->i_blocks += BLOCKS_PER_PAGE;
@@ -372,7 +394,8 @@ static swp_entry_t *shmem_swp_alloc(stru
 		}
 
 		spin_unlock(&info->lock);
-		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
+		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | 
+				__GFP_UBC);
 		if (page) {
 			clear_highpage(page);
 			page->nr_swapped = 0;
@@ -380,25 +403,36 @@ static swp_entry_t *shmem_swp_alloc(stru
 		spin_lock(&info->lock);
 
 		if (!page) {
-			shmem_free_block(inode);
-			return ERR_PTR(-ENOMEM);
+			entry = ERR_PTR(-ENOMEM);
+			goto out_block;
 		}
 		if (sgp != SGP_WRITE &&
 		    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
 			entry = ERR_PTR(-EINVAL);
-			break;
+			goto out_page;
 		}
-		if (info->next_index <= index)
+		if (info->next_index <= index) {
+			ub_val = 0;
 			info->next_index = index + 1;
+		}
 	}
 	if (page) {
 		/* another task gave its page, or truncated the file */
 		shmem_free_block(inode);
 		shmem_dir_free(page);
 	}
-	if (info->next_index <= index && !IS_ERR(entry))
+	if (info->next_index <= index)
 		info->next_index = index + 1;
 	return entry;
+
+out_page:
+	shmem_dir_free(page);
+out_block:
+	shmem_free_block(inode);
+out:
+	if (ub_val)
+		ub_shmpages_uncharge(shm_info_ub(info), ub_val);
+	return entry;
 }
 
 /*
@@ -435,13 +469,16 @@ static void shmem_truncate(struct inode 
 	swp_entry_t *ptr;
 	int offset;
 	int freed;
+	unsigned long swp_freed;
 
+	swp_freed = 0;
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 	idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	if (idx >= info->next_index)
 		return;
 
 	spin_lock(&info->lock);
+	ub_shmpages_uncharge(shm_info_ub(info), info->next_index - idx);
 	info->flags |= SHMEM_TRUNCATE;
 	limit = info->next_index;
 	info->next_index = idx;
@@ -450,7 +487,9 @@ static void shmem_truncate(struct inode 
 		size = limit;
 		if (size > SHMEM_NR_DIRECT)
 			size = SHMEM_NR_DIRECT;
-		info->swapped -= shmem_free_swp(ptr+idx, ptr+size);
+		freed = shmem_free_swp(ptr+idx, ptr+size);
+		swp_freed += freed;
+		info->swapped -= freed;
 	}
 	if (!info->i_indirect)
 		goto done2;
@@ -520,6 +559,7 @@ static void shmem_truncate(struct inode 
 			shmem_swp_unmap(ptr);
 			info->swapped -= freed;
 			subdir->nr_swapped -= freed;
+			swp_freed += freed;
 			BUG_ON(subdir->nr_swapped > offset);
 		}
 		if (offset)
@@ -556,7 +596,7 @@ done2:
 		spin_lock(&info->lock);
 	}
 	info->flags &= ~SHMEM_TRUNCATE;
-	shmem_recalc_inode(inode);
+	shmem_recalc_inode(inode, swp_freed);
 	spin_unlock(&info->lock);
 }
 
@@ -625,6 +665,8 @@ static void shmem_delete_inode(struct in
 		sbinfo->free_inodes++;
 		spin_unlock(&sbinfo->stat_lock);
 	}
+	put_beancounter(shm_info_ub(info));
+	shm_info_ub(info) = NULL;
 	clear_inode(inode);
 }
 
@@ -769,12 +811,11 @@ static int shmem_writepage(struct page *
 	info = SHMEM_I(inode);
 	if (info->flags & VM_LOCKED)
 		goto redirty;
-	swap = get_swap_page();
+	swap = get_swap_page(shm_info_ub(info));
 	if (!swap.val)
 		goto redirty;
 
 	spin_lock(&info->lock);
-	shmem_recalc_inode(inode);
 	if (index >= info->next_index) {
 		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
 		goto unlock;
@@ -913,7 +954,6 @@ repeat:
 		goto failed;
 
 	spin_lock(&info->lock);
-	shmem_recalc_inode(inode);
 	entry = shmem_swp_alloc(info, idx, sgp);
 	if (IS_ERR(entry)) {
 		spin_unlock(&info->lock);
@@ -1082,6 +1122,7 @@ repeat:
 		clear_highpage(filepage);
 		flush_dcache_page(filepage);
 		SetPageUptodate(filepage);
+		ub_tmpfs_respages_inc(shm_info_ub(info), 1);
 	}
 done:
 	if (*pagep != filepage) {
@@ -1179,28 +1220,6 @@ shmem_get_policy(struct vm_area_struct *
 }
 #endif
 
-int shmem_lock(struct file *file, int lock, struct user_struct *user)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct shmem_inode_info *info = SHMEM_I(inode);
-	int retval = -ENOMEM;
-
-	spin_lock(&info->lock);
-	if (lock && !(info->flags & VM_LOCKED)) {
-		if (!user_shm_lock(inode->i_size, user))
-			goto out_nomem;
-		info->flags |= VM_LOCKED;
-	}
-	if (!lock && (info->flags & VM_LOCKED) && user) {
-		user_shm_unlock(inode->i_size, user);
-		info->flags &= ~VM_LOCKED;
-	}
-	retval = 0;
-out_nomem:
-	spin_unlock(&info->lock);
-	return retval;
-}
-
 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	file_accessed(file);
@@ -1237,6 +1256,7 @@ shmem_get_inode(struct super_block *sb, 
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		info = SHMEM_I(inode);
 		memset(info, 0, (char *)inode - (char *)info);
+		shm_info_ub(info) = get_beancounter(get_exec_ub());
 		spin_lock_init(&info->lock);
 		INIT_LIST_HEAD(&info->swaplist);
 
@@ -1365,6 +1385,7 @@ shmem_file_write(struct file *file, cons
 			break;
 
 		left = bytes;
+#ifndef CONFIG_X86_UACCESS_INDIRECT
 		if (PageHighMem(page)) {
 			volatile unsigned char dummy;
 			__get_user(dummy, buf);
@@ -1375,6 +1396,7 @@ shmem_file_write(struct file *file, cons
 							buf, bytes);
 			kunmap_atomic(kaddr, KM_USER0);
 		}
+#endif
 		if (left) {
 			kaddr = kmap(page);
 			left = __copy_from_user(kaddr + offset, buf, bytes);
@@ -2137,20 +2159,42 @@ static struct xattr_handler *shmem_xattr
 
 #endif	/* CONFIG_TMPFS_XATTR */
 
+int is_shmem_mapping(struct address_space *map)
+{
+	return (map != NULL && map->a_ops == &shmem_aops);
+}
+
 static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
 	return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
 }
 
-static struct file_system_type tmpfs_fs_type = {
+struct file_system_type tmpfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "tmpfs",
 	.get_sb		= shmem_get_sb,
 	.kill_sb	= kill_litter_super,
 };
+
+EXPORT_SYMBOL(tmpfs_fs_type);
+
 static struct vfsmount *shm_mnt;
 
+#ifndef CONFIG_VE
+#define visible_shm_mnt shm_mnt
+#else
+#define visible_shm_mnt (get_exec_env()->shmem_mnt)
+#endif
+
+void prepare_shmmnt(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->shmem_mnt = shm_mnt;
+	shm_mnt = (struct vfsmount *)0x10111213;
+#endif
+}
+
 static int __init init_tmpfs(void)
 {
 	int error;
@@ -2167,13 +2211,14 @@ static int __init init_tmpfs(void)
 #ifdef CONFIG_TMPFS
 	devfs_mk_dir("shm");
 #endif
-	shm_mnt = do_kern_mount(tmpfs_fs_type.name, MS_NOUSER,
+	shm_mnt = do_kern_mount(&tmpfs_fs_type, MS_NOUSER,
 				tmpfs_fs_type.name, NULL);
 	if (IS_ERR(shm_mnt)) {
 		error = PTR_ERR(shm_mnt);
 		printk(KERN_ERR "Could not kern_mount tmpfs\n");
 		goto out1;
 	}
+	prepare_shmmnt();
 	return 0;
 
 out1:
@@ -2186,6 +2231,32 @@ out3:
 }
 module_init(init_tmpfs)
 
+static inline int shm_charge_ahead(struct inode *inode)
+{
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	unsigned long idx;
+	swp_entry_t *entry;
+
+	if (!inode->i_size)
+		return 0;
+	idx = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
+	/* 
+	 * Just touch info to allocate space for entry and
+	 * make all UBC checks 
+	 */
+	spin_lock(&info->lock);
+	entry = shmem_swp_alloc(info, idx, SGP_CACHE);
+	if (IS_ERR(entry))
+		goto err;
+	shmem_swp_unmap(entry);
+	spin_unlock(&info->lock);
+	return 0;
+
+err:
+	spin_unlock(&info->lock);
+	return PTR_ERR(entry);
+}
+
 /*
  * shmem_file_setup - get an unlinked file living in tmpfs
  *
@@ -2201,8 +2272,8 @@ struct file *shmem_file_setup(char *name
 	struct dentry *dentry, *root;
 	struct qstr this;
 
-	if (IS_ERR(shm_mnt))
-		return (void *)shm_mnt;
+	if (IS_ERR(visible_shm_mnt))
+		return (void *)visible_shm_mnt;
 
 	if (size < 0 || size > SHMEM_MAX_BYTES)
 		return ERR_PTR(-EINVAL);
@@ -2214,7 +2285,7 @@ struct file *shmem_file_setup(char *name
 	this.name = name;
 	this.len = strlen(name);
 	this.hash = 0; /* will go */
-	root = shm_mnt->mnt_root;
+	root = visible_shm_mnt->mnt_root;
 	dentry = d_alloc(root, &this);
 	if (!dentry)
 		goto put_memory;
@@ -2233,7 +2304,10 @@ struct file *shmem_file_setup(char *name
 	d_instantiate(dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;	/* It is unlinked */
-	file->f_vfsmnt = mntget(shm_mnt);
+	error = shm_charge_ahead(inode);
+	if (error)
+		goto close_file;	
+	file->f_vfsmnt = mntget(visible_shm_mnt);
 	file->f_dentry = dentry;
 	file->f_mapping = inode->i_mapping;
 	file->f_op = &shmem_file_operations;
@@ -2248,6 +2322,7 @@ put_memory:
 	shmem_unacct_size(flags, size);
 	return ERR_PTR(error);
 }
+EXPORT_SYMBOL(shmem_file_setup);
 
 /*
  * shmem_zero_setup - setup a shared anonymous mapping
@@ -2265,6 +2340,8 @@ int shmem_zero_setup(struct vm_area_stru
 
 	if (vma->vm_file)
 		fput(vma->vm_file);
+	else if (vma->vm_flags & VM_WRITE) /* should match VM_UB_PRIVATE */
+		__ub_unused_privvm_dec(mm_ub(vma->vm_mm), size >> PAGE_SHIFT);
 	vma->vm_file = file;
 	vma->vm_ops = &shmem_vm_ops;
 	return 0;
diff -Nurap linux-2.6.9-100.orig/mm/slab.c linux-2.6.9-ve023stab054/mm/slab.c
--- linux-2.6.9-100.orig/mm/slab.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/slab.c	2011-06-15 19:26:22.000000000 +0400
@@ -92,33 +92,22 @@
 #include	<linux/sysctl.h>
 #include	<linux/module.h>
 #include	<linux/rcupdate.h>
+#include	<linux/kmem_slab.h>
+#include	<linux/kmem_cache.h>
+#include	<linux/kernel_stat.h>
+#include	<linux/ve_owner.h>
 
 #include	<asm/uaccess.h>
 #include	<asm/cacheflush.h>
 #include	<asm/tlbflush.h>
 #include	<asm/page.h>
 
-/*
- * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
- *		  SLAB_RED_ZONE & SLAB_POISON.
- *		  0 for faster, smaller code (especially in the critical paths).
- *
- * STATS	- 1 to collect stats for /proc/slabinfo.
- *		  0 for faster, smaller code (especially in the critical paths).
- *
- * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
- */
-
-#ifdef CONFIG_DEBUG_SLAB
-#define	DEBUG		1
-#define	STATS		1
-#define	FORCED_DEBUG	1
-#else
-#define	DEBUG		0
-#define	STATS		0
-#define	FORCED_DEBUG	0
-#endif
+#include	<ub/beancounter.h>
+#include	<ub/ub_mem.h>
 
+#define	DEBUG		SLAB_DEBUG
+#define	STATS		SLAB_STATS
+#define	FORCED_DEBUG	SLAB_FORCED_DEBUG
 
 /* Shouldn't this be in a header file somewhere? */
 #define	BYTES_PER_WORD		sizeof(void *)
@@ -142,202 +131,21 @@
 			 SLAB_NO_REAP | SLAB_CACHE_DMA | \
 			 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
+			 SLAB_UBC | SLAB_NO_CHARGE | \
 			 SLAB_DESTROY_BY_RCU)
 #else
 # define CREATE_MASK	(SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
 			 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
+			 SLAB_UBC | SLAB_NO_CHARGE | \
 			 SLAB_DESTROY_BY_RCU)
 #endif
 
-/*
- * kmem_bufctl_t:
- *
- * Bufctl's are used for linking objs within a slab
- * linked offsets.
- *
- * This implementation relies on "struct page" for locating the cache &
- * slab an object belongs to.
- * This allows the bufctl structure to be small (one int), but limits
- * the number of objects a slab (not a cache) can contain when off-slab
- * bufctls are used. The limit is the size of the largest general cache
- * that does not use off-slab slabs.
- * For 32bit archs with 4 kB pages, is this 56.
- * This is not serious, as it is only for large objects, when it is unwise
- * to have too many per slab.
- * Note: This limit can be raised by introducing a general cache whose size
- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
- */
-
-#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
-#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
-
 /* Max number of objs-per-slab for caches which use off-slab slabs.
  * Needed to avoid a possible looping condition in cache_grow().
  */
 static unsigned long offslab_limit;
 
-/*
- * struct slab
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
- */
-struct slab {
-	struct list_head	list;
-	unsigned long		colouroff;
-	void			*s_mem;		/* including colour offset */
-	unsigned int		inuse;		/* num of objs active in slab */
-	kmem_bufctl_t		free;
-};
-
-/*
- * struct slab_rcu
- *
- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
- * arrange for kmem_freepages to be called via RCU.  This is useful if
- * we need to approach a kernel structure obliquely, from its address
- * obtained without the usual locking.  We can lock the structure to
- * stabilize it and check it's still at the given address, only if we
- * can be sure that the memory has not been meanwhile reused for some
- * other kind of object (which our subsystem's lock might corrupt).
- *
- * rcu_read_lock before reading the address, then rcu_read_unlock after
- * taking the spinlock within the structure expected at that address.
- *
- * We assume struct slab_rcu can overlay struct slab when destroying.
- */
-struct slab_rcu {
-	struct rcu_head		head;
-	kmem_cache_t		*cachep;
-	void			*addr;
-};
-
-/*
- * struct array_cache
- *
- * Per cpu structures
- * Purpose:
- * - LIFO ordering, to hand out cache-warm objects from _alloc
- * - reduce the number of linked list operations
- * - reduce spinlock operations
- *
- * The limit is stored in the per-cpu structure to reduce the data cache
- * footprint.
- *
- */
-struct array_cache {
-	unsigned int avail;
-	unsigned int limit;
-	unsigned int batchcount;
-	unsigned int touched;
-};
-
-/* bootstrap: The caches do not work without cpuarrays anymore,
- * but the cpuarrays are allocated from the generic caches...
- */
-#define BOOT_CPUCACHE_ENTRIES	1
-struct arraycache_init {
-	struct array_cache cache;
-	void * entries[BOOT_CPUCACHE_ENTRIES];
-};
-
-/*
- * The slab lists of all objects.
- * Hopefully reduce the internal fragmentation
- * NUMA: The spinlock could be moved from the kmem_cache_t
- * into this structure, too. Figure out what causes
- * fewer cross-node spinlock operations.
- */
-struct kmem_list3 {
-	struct list_head	slabs_partial;	/* partial list first, better asm code */
-	struct list_head	slabs_full;
-	struct list_head	slabs_free;
-	unsigned long	free_objects;
-	int		free_touched;
-	unsigned long	next_reap;
-	struct array_cache	*shared;
-};
-
-#define LIST3_INIT(parent) \
-	{ \
-		.slabs_full	= LIST_HEAD_INIT(parent.slabs_full), \
-		.slabs_partial	= LIST_HEAD_INIT(parent.slabs_partial), \
-		.slabs_free	= LIST_HEAD_INIT(parent.slabs_free) \
-	}
-#define list3_data(cachep) \
-	(&(cachep)->lists)
-
-/* NUMA: per-node */
-#define list3_data_ptr(cachep, ptr) \
-		list3_data(cachep)
-
-/*
- * kmem_cache_t
- *
- * manages a cache.
- */
-	
-struct kmem_cache_s {
-/* 1) per-cpu data, touched during every alloc/free */
-	struct array_cache	*array[NR_CPUS];
-	unsigned int		batchcount;
-	unsigned int		limit;
-/* 2) touched by every alloc & free from the backend */
-	struct kmem_list3	lists;
-	/* NUMA: kmem_3list_t	*nodelists[MAX_NUMNODES] */
-	unsigned int		objsize;
-	unsigned int	 	flags;	/* constant flags */
-	unsigned int		num;	/* # of objs per slab */
-	unsigned int		free_limit; /* upper limit of objects in the lists */
-	spinlock_t		spinlock;
-
-/* 3) cache_grow/shrink */
-	/* order of pgs per slab (2^n) */
-	unsigned int		gfporder;
-
-	/* force GFP flags, e.g. GFP_DMA */
-	unsigned int		gfpflags;
-
-	size_t			colour;		/* cache colouring range */
-	unsigned int		colour_off;	/* colour offset */
-	unsigned int		colour_next;	/* cache colouring */
-	kmem_cache_t		*slabp_cache;
-	unsigned int		slab_size;
-	unsigned int		dflags;		/* dynamic flags */
-
-	/* constructor func */
-	void (*ctor)(void *, kmem_cache_t *, unsigned long);
-
-	/* de-constructor func */
-	void (*dtor)(void *, kmem_cache_t *, unsigned long);
-
-/* 4) cache creation/removal */
-	const char		*name;
-	struct list_head	next;
-
-/* 5) statistics */
-#if STATS
-	unsigned long		num_active;
-	unsigned long		num_allocations;
-	unsigned long		high_mark;
-	unsigned long		grown;
-	unsigned long		reaped;
-	unsigned long 		errors;
-	unsigned long		max_freeable;
-	atomic_t		allochit;
-	atomic_t		allocmiss;
-	atomic_t		freehit;
-	atomic_t		freemiss;
-#endif
-#if DEBUG
-	int			dbghead;
-	int			reallen;
-#endif
-};
-
 #define CFLGS_OFF_SLAB		(0x80000000UL)
 #define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
 
@@ -351,12 +159,13 @@ struct kmem_cache_s {
 #define REAPTIMEOUT_CPUC	(2*HZ)
 #define REAPTIMEOUT_LIST3	(4*HZ)
 
+#define	STATS_INC_GROWN(x)	((x)->grown++)
+#define	STATS_INC_REAPED(x)	((x)->reaped++)
+#define	STATS_INC_SHRUNK(x)	((x)->shrunk++)
 #if STATS
 #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
 #define	STATS_DEC_ACTIVE(x)	((x)->num_active--)
 #define	STATS_INC_ALLOCED(x)	((x)->num_allocations++)
-#define	STATS_INC_GROWN(x)	((x)->grown++)
-#define	STATS_INC_REAPED(x)	((x)->reaped++)
 #define	STATS_SET_HIGH(x)	do { if ((x)->num_active > (x)->high_mark) \
 					(x)->high_mark = (x)->num_active; \
 				} while (0)
@@ -374,8 +183,6 @@ struct kmem_cache_s {
 #define	STATS_INC_ACTIVE(x)	do { } while (0)
 #define	STATS_DEC_ACTIVE(x)	do { } while (0)
 #define	STATS_INC_ALLOCED(x)	do { } while (0)
-#define	STATS_INC_GROWN(x)	do { } while (0)
-#define	STATS_INC_REAPED(x)	do { } while (0)
 #define	STATS_SET_HIGH(x)	do { } while (0)
 #define	STATS_INC_ERR(x)	do { } while (0)
 #define	STATS_SET_FREEABLE(x, i) \
@@ -472,19 +279,12 @@ static void **dbg_userword(kmem_cache_t 
 #define	BREAK_GFP_ORDER_LO	0
 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
 
-/* Macros for storing/retrieving the cachep and or slab from the
- * global 'mem_map'. These are used to find the slab an obj belongs to.
- * With kfree(), these are used to find the cache which an obj belongs to.
- */
-#define	SET_PAGE_CACHE(pg,x)  ((pg)->lru.next = (struct list_head *)(x))
-#define	GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->lru.next)
-#define	SET_PAGE_SLAB(pg,x)   ((pg)->lru.prev = (struct list_head *)(x))
-#define	GET_PAGE_SLAB(pg)     ((struct slab *)(pg)->lru.prev)
-
 /* These are the default caches for kmalloc. Custom caches can have other sizes. */
 struct cache_sizes malloc_sizes[] = {
 #define CACHE(x) { .cs_size = (x) },
 #include <linux/kmalloc_sizes.h>
+	{ 0, },
+#include <linux/kmalloc_sizes.h>
 	{ 0, }
 #undef CACHE
 };
@@ -500,10 +300,17 @@ struct cache_names {
 static struct cache_names __initdata cache_names[] = {
 #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
 #include <linux/kmalloc_sizes.h>
+	{ NULL, },
+#undef CACHE
+#define CACHE(x) { .name = "size-" #x "(UBC)", .name_dma = "size-" #x "(DMA,UBC)" },
+#include <linux/kmalloc_sizes.h>
 	{ NULL, }
 #undef CACHE
 };
 
+int malloc_cache_num;
+EXPORT_SYMBOL(malloc_cache_num);
+
 static struct arraycache_init initarray_cache __initdata =
 	{ { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 static struct arraycache_init initarray_generic =
@@ -525,6 +332,7 @@ static kmem_cache_t cache_cache = {
 
 /* Guard access to the cache-chain. */
 static struct semaphore	cache_chain_sem;
+static spinlock_t cache_chain_lock;
 static struct list_head cache_chain;
 
 /*
@@ -566,6 +374,8 @@ static kmem_cache_t * kmem_find_general_
 {
 	struct cache_sizes *csizep = malloc_sizes;
 
+	if (gfpflags & __GFP_UBC)
+		csizep += malloc_cache_num;
 	/* This function could be moved to the header file, and
 	 * made inline so consumers can quickly determine what
 	 * cache pointer they require.
@@ -586,13 +396,24 @@ static void cache_estimate (unsigned lon
 	size_t wastage = PAGE_SIZE<<gfporder;
 	size_t extra = 0;
 	size_t base = 0;
+	size_t ub_align, ub_extra;
+
+	ub_align = 1;
+	ub_extra = 0;
 
 	if (!(flags & CFLGS_OFF_SLAB)) {
 		base = sizeof(struct slab);
 		extra = sizeof(kmem_bufctl_t);
+#ifdef CONFIG_USER_RESOURCE
+		if (flags & SLAB_UBC) {
+			ub_extra = sizeof(void *);
+			ub_align = sizeof(void *);
+		}
+#endif
 	}
 	i = 0;
-	while (i*size + ALIGN(base+i*extra, align) <= wastage)
+	while (i * size + ALIGN(ALIGN(base + i * extra, ub_align) +
+				i * ub_extra, align) <= wastage)
 		i++;
 	if (i > 0)
 		i--;
@@ -601,8 +422,8 @@ static void cache_estimate (unsigned lon
 		i = SLAB_LIMIT;
 
 	*num = i;
-	wastage -= i*size;
-	wastage -= ALIGN(base+i*extra, align);
+	wastage -= i * size + ALIGN(ALIGN(base + i * extra, ub_align) +
+			i * ub_extra, align);
 	*left_over = wastage;
 }
 
@@ -724,6 +545,7 @@ void __init kmem_cache_init(void)
 	size_t left_over;
 	struct cache_sizes *sizes;
 	struct cache_names *names;
+	int i;
 
 	/*
 	 * Fragmentation resistance on low memory - only use bigger
@@ -751,6 +573,7 @@ void __init kmem_cache_init(void)
 
 	/* 1) create the cache_cache */
 	init_MUTEX(&cache_chain_sem);
+	spin_lock_init(&cache_chain_lock);
 	INIT_LIST_HEAD(&cache_chain);
 	list_add(&cache_cache.next, &cache_chain);
 	cache_cache.colour_off = cache_line_size();
@@ -771,7 +594,7 @@ void __init kmem_cache_init(void)
 	/* 2+3) create the kmalloc caches */
 	sizes = malloc_sizes;
 	names = cache_names;
-
+	for (i = 0; i < 2; i++) {
 	while (sizes->cs_size) {
 		/* For performance, all the general caches are L1 aligned.
 		 * This should be particularly beneficial on SMP boxes, as it
@@ -780,22 +603,28 @@ void __init kmem_cache_init(void)
 		 * allow tighter packing of the smaller caches. */
 		sizes->cs_cachep = kmem_cache_create(names->name,
 			sizes->cs_size, ARCH_KMALLOC_MINALIGN,
-			(ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
+			(ARCH_KMALLOC_FLAGS | SLAB_PANIC |
+			 (i ? SLAB_UBC : 0) | SLAB_NO_CHARGE),
+			NULL, NULL);
 
 		/* Inc off-slab bufctl limit until the ceiling is hit. */
-		if (!(OFF_SLAB(sizes->cs_cachep))) {
-			offslab_limit = sizes->cs_size-sizeof(struct slab);
-			offslab_limit /= sizeof(kmem_bufctl_t);
-		}
+		if (!(OFF_SLAB(sizes->cs_cachep)))
+			offslab_limit = sizes->cs_size;
 
 		sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
 			sizes->cs_size, ARCH_KMALLOC_MINALIGN,
-			(ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC),
+			(ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC |
+			 (i ? SLAB_UBC : 0) | SLAB_NO_CHARGE),
 			NULL, NULL);
 
 		sizes++;
 		names++;
 	}
+	sizes++;
+	names++;
+	if (!i)
+		malloc_cache_num = sizes - malloc_sizes;
+	}
 	/* 4) Replace the bootstrap head arrays */
 	{
 		void * ptr;
@@ -1169,7 +998,7 @@ kmem_cache_create (const char *name, siz
 	unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
 	void (*dtor)(void*, kmem_cache_t *, unsigned long))
 {
-	size_t left_over, slab_size;
+	size_t left_over, slab_size, ub_size, ub_align;
 	kmem_cache_t *cachep = NULL;
 
 	/*
@@ -1309,6 +1138,7 @@ kmem_cache_create (const char *name, siz
 		 */
 		do {
 			unsigned int break_flag = 0;
+			unsigned long off_slab_size;
 cal_wastage:
 			cache_estimate(cachep->gfporder, size, align, flags,
 						&left_over, &cachep->num);
@@ -1318,12 +1148,22 @@ cal_wastage:
 				break;
 			if (!cachep->num)
 				goto next;
-			if (flags & CFLGS_OFF_SLAB &&
-					cachep->num > offslab_limit) {
+			if (flags & CFLGS_OFF_SLAB) {
+				off_slab_size = sizeof(struct slab) +
+					cachep->num * sizeof(kmem_bufctl_t);
+#ifdef CONFIG_USER_RESOURCE
+				if (flags & SLAB_UBC)
+					off_slab_size = ALIGN(off_slab_size,
+							sizeof(void *)) +
+						cachep->num * sizeof(void *);
+#endif
+
 				/* This num of objs will cause problems. */
-				cachep->gfporder--;
-				break_flag++;
-				goto cal_wastage;
+				if (off_slab_size > offslab_limit) {
+					cachep->gfporder--;
+					break_flag++;
+					goto cal_wastage;
+				}
 			}
 
 			/*
@@ -1346,8 +1186,19 @@ next:
 		cachep = NULL;
 		goto opps;
 	}
-	slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t)
-				+ sizeof(struct slab), align);
+
+	ub_size = 0;
+	ub_align = 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (flags & SLAB_UBC) {
+		ub_size = sizeof(void *);
+		ub_align = sizeof(void *);
+	}
+#endif
+
+	slab_size = ALIGN(ALIGN(cachep->num * sizeof(kmem_bufctl_t) +
+			sizeof(struct slab), ub_align) +
+			cachep->num * ub_size, align);
 
 	/*
 	 * If the slab has been placed off-slab, and we have enough space then
@@ -1360,7 +1211,9 @@ next:
 
 	if (flags & CFLGS_OFF_SLAB) {
 		/* really off slab. No need for manual alignment */
-		slab_size = cachep->num*sizeof(kmem_bufctl_t)+sizeof(struct slab);
+		slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) +
+			sizeof(struct slab), ub_align) +
+			cachep->num * ub_size;
 	}
 
 	cachep->colour_off = cache_line_size();
@@ -1457,9 +1310,21 @@ next:
 	}
 
 	/* cache setup completed, link it into the list */
+	spin_lock(&cache_chain_lock);
 	list_add(&cachep->next, &cache_chain);
+	spin_unlock(&cache_chain_lock);
 	up(&cache_chain_sem);
 	unlock_cpu_hotplug();
+
+#ifdef CONFIG_USER_RESOURCE
+	cachep->objuse = ((PAGE_SIZE << cachep->gfporder) + cachep->num - 1) /
+		cachep->num;
+	if (OFF_SLAB(cachep))
+		cachep->objuse +=
+			(cachep->slabp_cache->objuse + cachep->num - 1)
+			/ cachep->num;
+#endif
+
 opps:
 	if (!cachep && (flags & SLAB_PANIC))
 		panic("kmem_cache_create(): failed to create slab `%s'\n",
@@ -1561,6 +1426,7 @@ static int __cache_shrink(kmem_cache_t *
 			BUG();
 #endif
 		list_del(&slabp->list);
+		STATS_INC_SHRUNK(cachep);
 
 		cachep->lists.free_objects -= cachep->num;
 		spin_unlock_irq(&cachep->spinlock);
@@ -1622,13 +1488,17 @@ int kmem_cache_destroy (kmem_cache_t * c
 	/*
 	 * the chain is never empty, cache_cache is never destroyed
 	 */
+	spin_lock(&cache_chain_lock);
 	list_del(&cachep->next);
+	spin_unlock(&cache_chain_lock);
 	up(&cache_chain_sem);
 
 	if (__cache_shrink(cachep)) {
 		slab_error(cachep, "Can't free all objects");
 		down(&cache_chain_sem);
+		spin_lock(&cache_chain_lock);
 		list_add(&cachep->next,&cache_chain);
+		spin_unlock(&cache_chain_lock);
 		up(&cache_chain_sem);
 		unlock_cpu_hotplug();
 		return 1;
@@ -1646,6 +1516,7 @@ int kmem_cache_destroy (kmem_cache_t * c
 	/* NUMA: free the list3 structures */
 	kfree(cachep->lists.shared);
 	cachep->lists.shared = NULL;
+	ub_kmemcache_free(cachep);
 	kmem_cache_free(&cache_cache, cachep);
 
 	unlock_cpu_hotplug();
@@ -1660,28 +1531,30 @@ static struct slab* alloc_slabmgmt (kmem
 			void *objp, int colour_off, int local_flags)
 {
 	struct slab *slabp;
-	
+
 	if (OFF_SLAB(cachep)) {
 		/* Slab management obj is off-slab. */
-		slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
+		slabp = kmem_cache_alloc(cachep->slabp_cache,
+				local_flags & (~__GFP_UBC));
 		if (!slabp)
 			return NULL;
 	} else {
 		slabp = objp+colour_off;
 		colour_off += cachep->slab_size;
 	}
+
 	slabp->inuse = 0;
 	slabp->colouroff = colour_off;
 	slabp->s_mem = objp+colour_off;
 
+#ifdef CONFIG_USER_RESOURCE
+	if (cachep->flags & SLAB_UBC)
+		memset(slab_ubcs(cachep, slabp), 0, cachep->num *
+				sizeof(struct user_beancounter *));
+#endif
 	return slabp;
 }
 
-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
-{
-	return (kmem_bufctl_t *)(slabp+1);
-}
-
 static void cache_init_objs (kmem_cache_t * cachep,
 			struct slab * slabp, unsigned long ctor_flags)
 {
@@ -1809,7 +1682,7 @@ static int cache_grow (kmem_cache_t * ca
 
 
 	/* Get mem for the objs. */
-	if (!(objp = kmem_getpages(cachep, flags, -1)))
+	if (!(objp = kmem_getpages(cachep, flags & (~__GFP_UBC), -1)))
 		goto failed;
 
 	/* Get slab management. */
@@ -2112,6 +1985,16 @@ cache_alloc_debugcheck_after(kmem_cache_
 #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
 #endif
 
+static inline int should_charge(kmem_cache_t *cachep, int flags, void *objp)
+{
+	if (objp == NULL)
+		return 0;
+	if (!(cachep->flags & SLAB_UBC))
+		return 0;
+	if ((cachep->flags & SLAB_NO_CHARGE) && !(flags & __GFP_UBC))
+		return 0;
+	return 1;
+}
 
 static inline void * __cache_alloc (kmem_cache_t *cachep, int flags)
 {
@@ -2131,9 +2014,19 @@ static inline void * __cache_alloc (kmem
 		STATS_INC_ALLOCMISS(cachep);
 		objp = cache_alloc_refill(cachep, flags);
 	}
+	if (should_charge(cachep, flags, objp) &&
+			ub_slab_charge(objp, flags) < 0)
+		goto out_err;
 	local_irq_restore(save_flags);
+
 	objp = cache_alloc_debugcheck_after(cachep, flags, objp, __builtin_return_address(0));
 	return objp;
+
+out_err:
+	local_irq_restore(save_flags);
+	objp = cache_alloc_debugcheck_after(cachep, flags, objp, __builtin_return_address(0));
+	kmem_cache_free(cachep, objp);
+	return NULL;
 }
 
 /* 
@@ -2175,6 +2068,7 @@ static void free_block(kmem_cache_t *cac
 		/* fixup slab chains */
 		if (slabp->inuse == 0) {
 			if (cachep->lists.free_objects > cachep->free_limit) {
+				STATS_INC_SHRUNK(cachep);
 				cachep->lists.free_objects -= cachep->num;
 				slab_destroy(cachep, slabp);
 			} else {
@@ -2192,6 +2086,17 @@ static void free_block(kmem_cache_t *cac
 	}
 }
 
+void kmem_cache_free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
+{
+	unsigned long flags;
+
+	if (!nr_objects)
+		return;
+	spin_lock_irqsave(&cachep->spinlock, flags);
+	free_block(cachep, objpp, nr_objects);
+	spin_unlock_irqrestore(&cachep->spinlock, flags);
+}
+
 static void cache_flusharray (kmem_cache_t* cachep, struct array_cache *ac)
 {
 	int batchcount;
@@ -2256,6 +2161,9 @@ static inline void __cache_free (kmem_ca
 	check_irq_off();
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
+	if (cachep->flags & SLAB_UBC)
+		ub_slab_uncharge(objp);
+
 	if (likely(ac->avail < ac->limit)) {
 		STATS_INC_FREEHIT(cachep);
 		ac_entry(ac)[ac->avail++] = objp;
@@ -2424,6 +2332,8 @@ void * __kmalloc (size_t size, int flags
 {
 	struct cache_sizes *csizep = malloc_sizes;
 
+	if (flags & __GFP_UBC)
+		csizep += malloc_cache_num;
 	for (; csizep->cs_size; csizep++) {
 		if (size > csizep->cs_size)
 			continue;
@@ -2762,6 +2672,7 @@ static void cache_reap(void *unused)
 		return;
 	}
 
+	{KSTAT_PERF_ENTER(cache_reap)
 	list_for_each(walk, &cache_chain) {
 		kmem_cache_t *searchp;
 		struct list_head* p;
@@ -2820,10 +2731,68 @@ next:
 	}
 	check_irq_on();
 	up(&cache_chain_sem);
+	KSTAT_PERF_LEAVE(cache_reap)}
 	/* Setup the next iteration */
 	schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id());
 }
 
+#define SHOW_TOP_SLABS	7
+
+static unsigned long get_cache_size(kmem_cache_t *cachep)
+{
+	unsigned long flags;
+	unsigned long slabs;
+	struct slab *slabp;
+
+	slabs = 0;
+
+	spin_lock_irqsave(&cachep->spinlock, flags);
+	list_for_each_entry(slabp, &cachep->lists.slabs_full, list)
+		slabs++;
+	list_for_each_entry(slabp, &cachep->lists.slabs_partial, list)
+		slabs++;
+	list_for_each_entry(slabp, &cachep->lists.slabs_free, list)
+		slabs++;
+	spin_unlock_irqrestore(&cachep->spinlock, flags);
+
+	return slabs * (PAGE_SIZE << cachep->gfporder) +
+		(OFF_SLAB(cachep) ? cachep->slabp_cache->objsize * slabs : 0);
+}
+
+void show_slab_info(void)
+{
+	int i, j;
+	unsigned long size;
+	kmem_cache_t *ptr;
+	unsigned long sizes[SHOW_TOP_SLABS];
+	kmem_cache_t *top[SHOW_TOP_SLABS];
+
+	memset(top, 0, sizeof(top));
+	memset(sizes, 0, sizeof(sizes));
+
+	printk("Top %d caches:\n", SHOW_TOP_SLABS);
+	spin_lock(&cache_chain_lock);
+	list_for_each_entry(ptr, &cache_chain, next) {
+		size = get_cache_size(ptr);
+
+		j = 0;
+		for (i = 1; i < SHOW_TOP_SLABS; i++)
+			if (sizes[i] < sizes[j])
+				j = i;
+
+		if (size > sizes[j]) {
+			sizes[j] = size;
+			top[j] = ptr;
+		}
+	}
+
+	for (i = 0; i < SHOW_TOP_SLABS; i++)
+		if (top[i])
+			printk(" %-21s: size %10lu objsize %10u\n", top[i]->name,
+					sizes[i], top[i]->objsize);
+	spin_unlock(&cache_chain_lock);
+}
+
 #ifdef CONFIG_PROC_FS
 
 static void *s_start(struct seq_file *m, loff_t *pos)
@@ -2846,7 +2815,7 @@ static void *s_start(struct seq_file *m,
 		seq_puts(m, " : tunables <batchcount> <limit> <sharedfactor>");
 		seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
 #if STATS
-		seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <freelimit>");
+		seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <freelimit> <shrunk>");
 		seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
 #endif
 		seq_putc(m, '\n');
@@ -2920,7 +2889,7 @@ static int s_show(struct seq_file *m, vo
 	if (error)
 		printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
 
-	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
+	seq_printf(m, "%-21s %6lu %6lu %6u %4u %4d",
 		name, active_objs, num_objs, cachep->objsize,
 		cachep->num, (1<<cachep->gfporder));
 	seq_printf(m, " : tunables %4u %4u %4u",
@@ -2934,13 +2903,14 @@ static int s_show(struct seq_file *m, vo
 		unsigned long allocs = cachep->num_allocations;
 		unsigned long grown = cachep->grown;
 		unsigned long reaped = cachep->reaped;
+		unsigned long shrunk = cachep->shrunk;
 		unsigned long errors = cachep->errors;
 		unsigned long max_freeable = cachep->max_freeable;
 		unsigned long free_limit = cachep->free_limit;
 
-		seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu %4lu %4lu %4lu",
+		seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu %4lu %4lu %4lu %5lu",
 				allocs, high, grown, reaped, errors, 
-				max_freeable, free_limit);
+				max_freeable, free_limit, shrunk);
 	}
 	/* cpu stats */
 	{
diff -Nurap linux-2.6.9-100.orig/mm/swap.c linux-2.6.9-ve023stab054/mm/swap.c
--- linux-2.6.9-100.orig/mm/swap.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/swap.c	2011-06-15 19:26:21.000000000 +0400
@@ -258,6 +258,7 @@ void fastcall lru_cache_add_active(struc
 		__pagevec_lru_add_active(pvec);
 	put_cpu_var(lru_add_active_pvecs);
 }
+EXPORT_SYMBOL(lru_cache_add_active);
 
 void lru_add_drain(void)
 {
@@ -276,6 +277,7 @@ void lru_add_drain(void)
 		__pagevec_deactivate(pvec);
 	put_cpu_var(lru_add_pvecs);
 }
+EXPORT_SYMBOL(lru_add_drain);
 
 /*
  * This path almost never happens for VM activity - pages are normally
diff -Nurap linux-2.6.9-100.orig/mm/swap_state.c linux-2.6.9-ve023stab054/mm/swap_state.c
--- linux-2.6.9-100.orig/mm/swap_state.c	2004-10-19 01:54:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/swap_state.c	2011-06-15 19:26:21.000000000 +0400
@@ -14,9 +14,15 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/backing-dev.h>
+#include <linux/kernel_stat.h>
 
 #include <asm/pgtable.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/ub_page.h>
+#include <ub/ub_vmpages.h>
+
 /*
  * swapper_space is a fiction, retained to simplify the path through
  * vmscan's shrink_list, to make sync_page look nicer, and to allow
@@ -42,23 +48,20 @@ struct address_space swapper_space = {
 };
 EXPORT_SYMBOL(swapper_space);
 
+/* can't remove variable swap_cache_info due to dynamic kernel */
 #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
 
-static struct {
-	unsigned long add_total;
-	unsigned long del_total;
-	unsigned long find_success;
-	unsigned long find_total;
-	unsigned long noent_race;
-	unsigned long exist_race;
-} swap_cache_info;
+struct swap_cache_info_struct swap_cache_info;
+EXPORT_SYMBOL(swap_cache_info);
 
 void show_swap_cache_info(void)
 {
-	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
+	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, "
+		"race %lu+%lu+%lu\n",
 		swap_cache_info.add_total, swap_cache_info.del_total,
 		swap_cache_info.find_success, swap_cache_info.find_total,
-		swap_cache_info.noent_race, swap_cache_info.exist_race);
+		swap_cache_info.noent_race, swap_cache_info.exist_race,
+		swap_cache_info.remove_race);
 }
 
 /*
@@ -90,8 +93,9 @@ static int __add_to_swap_cache(struct pa
 	}
 	return error;
 }
+EXPORT_SYMBOL(__add_to_swap_cache);
 
-static int add_to_swap_cache(struct page *page, swp_entry_t entry)
+int add_to_swap_cache(struct page *page, swp_entry_t entry)
 {
 	int error;
 
@@ -112,6 +116,7 @@ static int add_to_swap_cache(struct page
 	INC_CACHE_INFO(add_total);
 	return 0;
 }
+EXPORT_SYMBOL(add_to_swap_cache);
 
 /*
  * This must be called only on pages that have
@@ -148,7 +153,14 @@ int add_to_swap(struct page * page)
 		BUG();
 
 	for (;;) {
-		entry = get_swap_page();
+		struct user_beancounter *ub;
+
+		ub = pb_grab_page_ub(page);
+		if (IS_ERR(ub))
+			return 0;
+
+		entry = get_swap_page(ub);
+		put_beancounter(ub);
 		if (!entry.val)
 			return 0;
 
@@ -194,6 +206,7 @@ int add_to_swap(struct page * page)
 		}
 	}
 }
+EXPORT_SYMBOL(add_to_swap);
 
 /*
  * This must be called only on pages that have
@@ -264,10 +277,13 @@ int move_from_swap_cache(struct page *pa
  */
 static inline void free_swap_cache(struct page *page)
 {
-	if (PageSwapCache(page) && !TestSetPageLocked(page)) {
+	if (!PageSwapCache(page))
+		return;
+	if (!TestSetPageLocked(page)) {
 		remove_exclusive_swap_page(page);
 		unlock_page(page);
-	}
+	} else
+		INC_CACHE_INFO(remove_race);
 }
 
 /* 
@@ -323,6 +339,7 @@ struct page * lookup_swap_cache(swp_entr
 	INC_CACHE_INFO(find_total);
 	return page;
 }
+EXPORT_SYMBOL(lookup_swap_cache);
 
 /* 
  * Locate a page of swap in physical memory, reserving swap cache space
@@ -385,3 +402,4 @@ struct page *read_swap_cache_async(swp_e
 		page_cache_release(new_page);
 	return found_page;
 }
+EXPORT_SYMBOL(read_swap_cache_async);
diff -Nurap linux-2.6.9-100.orig/mm/swapfile.c linux-2.6.9-ve023stab054/mm/swapfile.c
--- linux-2.6.9-100.orig/mm/swapfile.c	2004-10-19 01:53:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/swapfile.c	2011-06-15 19:26:22.000000000 +0400
@@ -30,11 +30,14 @@
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
 
+#include <ub/ub_vmpages.h>
+
 spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
 unsigned int nr_swapfiles;
 long total_swap_pages;
 static int swap_overflow;
 
+EXPORT_SYMBOL(swaplock);
 EXPORT_SYMBOL(total_swap_pages);
 
 static const char Bad_file[] = "Bad swap file entry ";
@@ -48,6 +51,9 @@ struct swap_info_struct swap_info[MAX_SW
 
 static DECLARE_MUTEX(swapon_sem);
 
+EXPORT_SYMBOL(swap_list);
+EXPORT_SYMBOL(swap_info);
+
 /*
  * We need this because the bdev->unplug_fn can sleep and we cannot
  * hold swap_list_lock while calling the unplug_fn. And swap_list_lock
@@ -147,7 +153,7 @@ static inline int scan_swap_map(struct s
 	return 0;
 }
 
-swp_entry_t get_swap_page(void)
+swp_entry_t get_swap_page(struct user_beancounter *ub)
 {
 	struct swap_info_struct * p;
 	unsigned long offset;
@@ -164,7 +170,7 @@ swp_entry_t get_swap_page(void)
 
 	while (1) {
 		p = &swap_info[type];
-		if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
+		if ((p->flags & (SWP_ACTIVE|SWP_READONLY)) == SWP_ACTIVE) {
 			swap_device_lock(p);
 			offset = scan_swap_map(p);
 			swap_device_unlock(p);
@@ -177,6 +183,12 @@ swp_entry_t get_swap_page(void)
 				} else {
 					swap_list.next = type;
 				}
+#if CONFIG_USER_SWAP_ACCOUNTING
+				if (p->owner_map[offset] != NULL)
+					BUG();
+				ub_swapentry_inc(ub);
+				p->owner_map[offset] = get_beancounter(ub);
+#endif
 				goto out;
 			}
 		}
@@ -194,6 +206,7 @@ out:
 	swap_list_unlock();
 	return entry;
 }
+EXPORT_SYMBOL(get_swap_page);
 
 static struct swap_info_struct * swap_info_get(swp_entry_t entry)
 {
@@ -248,6 +261,11 @@ static int swap_entry_free(struct swap_i
 		count--;
 		p->swap_map[offset] = count;
 		if (!count) {
+#if CONFIG_USER_SWAP_ACCOUNTING
+			ub_swapentry_dec(p->owner_map[offset]);
+			put_beancounter(p->owner_map[offset]);
+			p->owner_map[offset] = NULL;
+#endif
 			if (offset < p->lowest_bit)
 				p->lowest_bit = offset;
 			if (offset > p->highest_bit)
@@ -273,6 +291,7 @@ void swap_free(swp_entry_t entry)
 		swap_info_put(p);
 	}
 }
+EXPORT_SYMBOL(swap_free);
 
 /*
  * Check if we're the only user of a swap page,
@@ -288,7 +307,8 @@ static int exclusive_swap_page(struct pa
 	p = swap_info_get(entry);
 	if (p) {
 		/* Is the only swap cache user the cache itself? */
-		if (p->swap_map[swp_offset(entry)] == 1) {
+		if ((p->flags & (SWP_ACTIVE|SWP_READONLY)) == SWP_ACTIVE &&
+		    p->swap_map[swp_offset(entry)] == 1) {
 			/* Recheck the page count with the swapcache lock held.. */
 			spin_lock_irq(&swapper_space.tree_lock);
 			if (page_count(page) == 2)
@@ -379,6 +399,54 @@ int remove_exclusive_swap_page(struct pa
 	return retval;
 }
 
+int try_to_remove_exclusive_swap_page(struct page *page)
+{
+	int retval;
+	struct swap_info_struct * p;
+	swp_entry_t entry;
+
+	BUG_ON(PagePrivate(page));
+	BUG_ON(!PageLocked(page));
+
+	if (!PageSwapCache(page))
+		return 0;
+	if (PageWriteback(page))
+		return 0;
+	if (page_count(page) != 2) /* 2: us + cache */
+		return 0;
+
+	entry.val = page->private;
+	p = swap_info_get(entry);
+	if (!p)
+		return 0;
+	if (!vm_swap_full() &&
+	    (p->flags & (SWP_ACTIVE|SWP_READONLY)) == SWP_ACTIVE) {
+		swap_info_put(p);
+		return 0;
+	}
+
+	/* Is the only swap cache user the cache itself? */
+	retval = 0;
+	if (p->swap_map[swp_offset(entry)] == 1) {
+		/* Recheck the page count with the swapcache lock held.. */
+		spin_lock_irq(&swapper_space.tree_lock);
+		if ((page_count(page) == 2) && !PageWriteback(page)) {
+			__delete_from_swap_cache(page);
+			SetPageDirty(page);
+			retval = 1;
+		}
+		spin_unlock_irq(&swapper_space.tree_lock);
+	}
+	swap_info_put(p);
+
+	if (retval) {
+		swap_free(entry);
+		page_cache_release(page);
+	}
+
+	return retval;
+}
+
 /*
  * Free the swap entry like above, but also try to
  * free the page cache entry if it is the last user.
@@ -415,6 +483,7 @@ void free_swap_and_cache(swp_entry_t ent
 		page_cache_release(page);
 	}
 }
+EXPORT_SYMBOL(free_swap_and_cache);
 
 /*
  * The swap entry has been read in advance, and we return 1 to indicate
@@ -428,9 +497,11 @@ void free_swap_and_cache(swp_entry_t ent
 /* vma->vm_mm->page_table_lock is held */
 static void
 unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
-	swp_entry_t entry, struct page *page)
+	swp_entry_t entry, struct page *page, struct page_beancounter **ppbs)
 {
 	vma->vm_mm->rss++;
+	ub_unused_privvm_dec(mm_ub(vma->vm_mm), 1, vma);
+	pb_add_list_ref(page, NULL, mm_ub(vma->vm_mm), ppbs);
 	get_page(page);
 	set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	page_add_anon_rmap(page, vma, address);
@@ -440,7 +511,7 @@ unuse_pte(struct vm_area_struct *vma, un
 /* vma->vm_mm->page_table_lock is held */
 static unsigned long unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
 	unsigned long address, unsigned long size, unsigned long offset,
-	swp_entry_t entry, struct page *page)
+	swp_entry_t entry, struct page *page, struct page_beancounter **ppbs)
 {
 	pte_t * pte;
 	unsigned long end;
@@ -465,7 +536,8 @@ static unsigned long unuse_pmd(struct vm
 		 * Test inline before going to call unuse_pte.
 		 */
 		if (unlikely(pte_same(*pte, swp_pte))) {
-			unuse_pte(vma, offset + address, pte, entry, page);
+			unuse_pte(vma, offset + address, pte, entry, page,
+					ppbs);
 			pte_unmap(pte);
 
 			/*
@@ -486,8 +558,8 @@ static unsigned long unuse_pmd(struct vm
 
 /* vma->vm_mm->page_table_lock is held */
 static unsigned long unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
-	unsigned long address, unsigned long size,
-	swp_entry_t entry, struct page *page)
+	unsigned long address, unsigned long size, swp_entry_t entry,
+	struct page *page, struct page_beancounter **ppbs)
 {
 	pmd_t * pmd;
 	unsigned long offset, end;
@@ -510,7 +582,7 @@ static unsigned long unuse_pgd(struct vm
 		BUG();
 	do {
 		foundaddr = unuse_pmd(vma, pmd, address, end - address,
-						offset, entry, page);
+				offset, entry, page, ppbs);
 		if (foundaddr)
 			return foundaddr;
 		address = (address + PMD_SIZE) & PMD_MASK;
@@ -521,7 +593,7 @@ static unsigned long unuse_pgd(struct vm
 
 /* vma->vm_mm->page_table_lock is held */
 static unsigned long unuse_vma(struct vm_area_struct * vma,
-	swp_entry_t entry, struct page *page)
+	swp_entry_t entry, struct page *page, struct page_beancounter **ppbs)
 {
 	pgd_t *pgdir;
 	unsigned long start, end;
@@ -540,7 +612,7 @@ static unsigned long unuse_vma(struct vm
 	pgdir = pgd_offset(vma->vm_mm, start);
 	do {
 		foundaddr = unuse_pgd(vma, pgdir, start, end - start,
-						entry, page);
+						entry, page, ppbs);
 		if (foundaddr)
 			return foundaddr;
 		start = (start + PGDIR_SIZE) & PGDIR_MASK;
@@ -550,7 +622,8 @@ static unsigned long unuse_vma(struct vm
 }
 
 static int unuse_process(struct mm_struct * mm,
-			swp_entry_t entry, struct page* page)
+			swp_entry_t entry, struct page* page,
+			struct page_beancounter **ppbs)
 {
 	struct vm_area_struct* vma;
 	unsigned long foundaddr = 0;
@@ -570,7 +643,7 @@ static int unuse_process(struct mm_struc
 	spin_lock(&mm->page_table_lock);
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		if (vma->anon_vma) {
-			foundaddr = unuse_vma(vma, entry, page);
+			foundaddr = unuse_vma(vma, entry, page, ppbs);
 			if (foundaddr)
 				break;
 		}
@@ -638,6 +711,7 @@ static int try_to_unuse(unsigned int typ
 	int retval = 0;
 	int reset_overflow = 0;
 	int shmem;
+	struct page_beancounter *pb_list;
 
 	/*
 	 * When searching mms for an entry, a good strategy is to
@@ -696,6 +770,13 @@ static int try_to_unuse(unsigned int typ
 			break;
 		}
 
+		pb_list = NULL;
+		if (pb_reserve_all(&pb_list)) {
+			page_cache_release(page);
+			retval = -ENOMEM;
+			break;
+		}
+
 		/*
 		 * Don't hold on to start_mm if it looks like exiting.
 		 */
@@ -718,6 +799,20 @@ static int try_to_unuse(unsigned int typ
 		lock_page(page);
 		wait_on_page_writeback(page);
 
+		/* If read failed we cannot map not-uptodate page to 
+		 * user space. Actually, we are in serious troubles,
+		 * we do not even know what process to kill. So, the only
+		 * variant remains: to stop swapoff() and allow someone
+		 * to kill processes to zap invalid pages.
+		 */
+		if (unlikely(!PageUptodate(page))) {
+			pb_free_list(&pb_list);
+			unlock_page(page);
+			page_cache_release(page);
+			retval = -EIO;
+			break;
+		}
+
 		/*
 		 * Remove all references to entry, without blocking.
 		 * Whenever we reach init_mm, there's no address space
@@ -729,8 +824,10 @@ static int try_to_unuse(unsigned int typ
 			if (start_mm == &init_mm)
 				shmem = shmem_unuse(entry, page);
 			else
-				retval = unuse_process(start_mm, entry, page);
+				retval = unuse_process(start_mm, entry, page,
+						&pb_list);
 		}
+
 		if (*swap_map > 1) {
 			int set_start_mm = (*swap_map >= swcount);
 			struct list_head *p = &start_mm->mmlist;
@@ -758,7 +855,8 @@ static int try_to_unuse(unsigned int typ
 					set_start_mm = 1;
 					shmem = shmem_unuse(entry, page);
 				} else
-					retval = unuse_process(mm, entry, page);
+					retval = unuse_process(mm, entry, page,
+							&pb_list);
 				if (set_start_mm && *swap_map < swcount) {
 					mmput(new_start_mm);
 					atomic_inc(&mm->mm_users);
@@ -772,6 +870,8 @@ static int try_to_unuse(unsigned int typ
 			mmput(start_mm);
 			start_mm = new_start_mm;
 		}
+
+		pb_free_list(&pb_list);
 		if (retval) {
 			unlock_page(page);
 			page_cache_release(page);
@@ -1087,6 +1187,7 @@ asmlinkage long sys_swapoff(const char _
 {
 	struct swap_info_struct * p = NULL;
 	unsigned short *swap_map;
+	struct user_beancounter **owner_map;
 	struct file *swap_file, *victim;
 	struct address_space *mapping;
 	struct inode *inode;
@@ -1094,6 +1195,10 @@ asmlinkage long sys_swapoff(const char _
 	int i, type, prev;
 	int err;
 	
+	/* VE admin check is just to be on the safe side, the admin may affect
+	 * swaps only if he has access to special, i.e. if he has been granted
+	 * access to the block device or if the swap file is in the area
+	 * visible to him. */
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
@@ -1177,12 +1282,15 @@ asmlinkage long sys_swapoff(const char _
 	p->max = 0;
 	swap_map = p->swap_map;
 	p->swap_map = NULL;
+	owner_map = p->owner_map;
+	p->owner_map = NULL;
 	p->flags = 0;
 	destroy_swap_extents(p);
 	swap_device_unlock(p);
 	swap_list_unlock();
 	up(&swapon_sem);
 	vfree(swap_map);
+	vfree(owner_map);
 	inode = mapping->host;
 	if (S_ISBLK(inode->i_mode)) {
 		struct block_device *bdev = I_BDEV(inode);
@@ -1201,6 +1309,7 @@ out_dput:
 out:
 	return err;
 }
+EXPORT_SYMBOL(sys_swapoff);
 
 #ifdef CONFIG_PROC_FS
 /* iterator */
@@ -1319,6 +1428,7 @@ asmlinkage long sys_swapon(const char __
 	struct page *page = NULL;
 	struct inode *inode = NULL;
 	int did_down = 0;
+	struct user_beancounter **owner_map;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1356,6 +1466,7 @@ asmlinkage long sys_swapon(const char __
 	p->highest_bit = 0;
 	p->cluster_nr = 0;
 	p->inuse_pages = 0;
+	p->owner_map = NULL;
 	p->sdev_lock = SPIN_LOCK_UNLOCKED;
 	p->next = -1;
 	if (swap_flags & SWAP_FLAG_PREFER) {
@@ -1522,6 +1633,15 @@ asmlinkage long sys_swapon(const char __
 		error = -EINVAL;
 		goto bad_swap;
 	}
+#if CONFIG_USER_SWAP_ACCOUNTING
+	p->owner_map = vmalloc(maxpages * sizeof(struct user_beancounter *));
+	if (!p->owner_map) {
+		error = -ENOMEM;
+		goto bad_swap;
+	}
+	memset(p->owner_map, 0,
+			maxpages * sizeof(struct user_beancounter *));
+#endif
 	p->swap_map[0] = SWAP_MAP_BAD;
 	p->max = maxpages;
 	p->pages = nr_good_pages;
@@ -1534,6 +1654,8 @@ asmlinkage long sys_swapon(const char __
 	swap_list_lock();
 	swap_device_lock(p);
 	p->flags = SWP_ACTIVE;
+	if (swap_flags & SWAP_FLAG_READONLY)
+		p->flags |= SWP_READONLY;
 	nr_swap_pages += nr_good_pages;
 	total_swap_pages += nr_good_pages;
 	printk(KERN_INFO "Adding %dk swap on %s.  Priority:%d extents:%d\n",
@@ -1567,6 +1689,7 @@ bad_swap:
 bad_swap_2:
 	swap_list_lock();
 	swap_map = p->swap_map;
+	owner_map = p->owner_map;
 	p->swap_file = NULL;
 	p->swap_map = NULL;
 	p->flags = 0;
@@ -1576,6 +1699,8 @@ bad_swap_2:
 	destroy_swap_extents(p);
 	if (swap_map)
 		vfree(swap_map);
+	if (owner_map)
+		vfree(owner_map);
 	if (swap_file)
 		filp_close(swap_file, NULL);
 out:
@@ -1592,6 +1717,7 @@ out:
 	}
 	return error;
 }
+EXPORT_SYMBOL(sys_swapon);
 
 void si_swapinfo(struct sysinfo *val)
 {
@@ -1648,6 +1774,7 @@ bad_file:
 	printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
 	goto out;
 }
+EXPORT_SYMBOL(swap_duplicate);
 
 struct swap_info_struct *
 get_swap_info_struct(unsigned type)
@@ -1663,10 +1790,20 @@ int valid_swaphandles(swp_entry_t entry,
 {
 	int ret = 0, i = 1 << page_cluster;
 	unsigned long toff;
-	struct swap_info_struct *swapdev = swp_type(entry) + swap_info;
+	struct swap_info_struct *swapdev;
+	unsigned type;
 
 	if (!page_cluster)	/* no readahead */
 		return 0;
+
+	type = swp_type(entry);
+	swapdev = type + swap_info;
+	if (type >= nr_swapfiles) {
+		printk("Bad swap entry: type %d, entry %lx, swp.flags %x, bdev %p\n",
+				type, entry.val, swapdev->flags, swapdev->bdev);
+		return 0;
+	}
+
 	toff = (swp_offset(entry) >> page_cluster) << page_cluster;
 	if (!toff)		/* first page is swap header */
 		toff++, i--;
diff -Nurap linux-2.6.9-100.orig/mm/vmalloc.c linux-2.6.9-ve023stab054/mm/vmalloc.c
--- linux-2.6.9-100.orig/mm/vmalloc.c	2011-06-09 19:22:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/vmalloc.c	2011-06-15 19:26:19.000000000 +0400
@@ -19,6 +19,7 @@
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_debug.h>
 
 rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
 struct vm_struct *vmlist;
@@ -267,6 +268,66 @@ struct vm_struct *get_vm_area(unsigned l
 	return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
 }
 
+struct vm_struct * get_vm_area_best(unsigned long size, unsigned long flags)
+{
+	unsigned long addr, best_addr, delta, best_delta;
+	struct vm_struct **p, **best_p, *tmp, *area;
+
+	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		return NULL;
+
+	size += PAGE_SIZE; /* one-page gap at the end */
+	addr = VMALLOC_START;
+	best_addr = 0UL;
+	best_p = NULL;
+	best_delta = PAGE_ALIGN(VMALLOC_END) - VMALLOC_START;
+
+	write_lock(&vmlist_lock);
+	for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
+		if ((size + addr) < addr)
+			break;
+		delta = (unsigned long) tmp->addr - (size + addr);
+		if (delta < best_delta) {
+			best_delta = delta;
+			best_addr = addr;
+			best_p = p;
+		}
+		addr = tmp->size + (unsigned long) tmp->addr;
+		if (addr > VMALLOC_END-size)
+			break;
+	}
+
+	if (!tmp) {
+		/* check free area after list end */
+		delta = (unsigned long) PAGE_ALIGN(VMALLOC_END) - (size + addr);
+		if (delta < best_delta) {
+			best_delta = delta;
+			best_addr = addr;
+			best_p = p;
+		}
+	}
+	if (best_addr) {
+		area->flags = flags;
+		/* allocate at the end of this area */
+		area->addr = (void *)(best_addr + best_delta);
+		area->size = size;
+		area->next = *best_p;
+		area->pages = NULL;
+		area->nr_pages = 0;
+		area->phys_addr = 0;
+		*best_p = area;
+		/* check like in __vunmap */
+		WARN_ON((PAGE_SIZE - 1) & (unsigned long)area->addr);
+	} else {
+		kfree(area);
+		area = NULL;
+	}
+	write_unlock(&vmlist_lock);
+
+	return area;
+}
+
 /**
  *	remove_vm_area  -  find and remove a contingous kernel virtual area
  *
@@ -319,6 +380,7 @@ void __vunmap(void *addr, int deallocate
 	if (deallocate_pages) {
 		int i;
 
+		dec_vmalloc_charged(area);
 		for (i = 0; i < area->nr_pages; i++) {
 			if (unlikely(!area->pages[i]))
 				BUG();
@@ -414,17 +476,20 @@ EXPORT_SYMBOL(vmap);
  *	allocator with @gfp_mask flags.  Map them into contiguous
  *	kernel virtual space, using a pagetable protection of @prot.
  */
-void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
+void *____vmalloc(unsigned long size, int gfp_mask, pgprot_t prot, int best)
 {
 	struct vm_struct *area;
 	struct page **pages;
-	unsigned int nr_pages, array_size, i;
+	unsigned int nr_pages, array_size, i, j;
 
 	size = PAGE_ALIGN(size);
 	if (!size || (size >> PAGE_SHIFT) > num_physpages)
 		return NULL;
 
-	area = get_vm_area(size, VM_ALLOC);
+	if (best)
+		area = get_vm_area_best(size, VM_ALLOC);
+	else
+		area = get_vm_area(size, VM_ALLOC);
 	if (!area)
 		return NULL;
 
@@ -437,31 +502,41 @@ void *__vmalloc(unsigned long size, int 
 	else
 		pages = kmalloc(array_size, (gfp_mask & ~__GFP_HIGHMEM));
 	area->pages = pages;
-	if (!area->pages) {
-		remove_vm_area(area->addr);
-		kfree(area);
-		return NULL;
-	}
+	if (!area->pages)
+		goto fail_area;
 	memset(area->pages, 0, array_size);
 
 	for (i = 0; i < area->nr_pages; i++) {
 		area->pages[i] = alloc_page(gfp_mask);
-		if (unlikely(!area->pages[i])) {
-			/* Successfully allocated i pages, free them in __vunmap() */
-			area->nr_pages = i;
+		if (unlikely(!area->pages[i]))
 			goto fail;
-		}
 	}
 	
 	if (map_vm_area(area, prot, &pages))
 		goto fail;
+
+	inc_vmalloc_charged(area, gfp_mask);
 	return area->addr;
 
 fail:
-	vfree(area->addr);
+	for (j = 0; j < i; j++)
+		__free_page(area->pages[j]);
+	if (array_size > PAGE_SIZE)
+		vfree(area->pages);
+	else
+		kfree(area->pages);
+fail_area:
+	remove_vm_area(area->addr);
+	kfree(area);
+	
 	return NULL;
 }
 
+void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
+{
+	return ____vmalloc(size, gfp_mask, prot, 0);
+}
+
 EXPORT_SYMBOL(__vmalloc);
 
 /**
@@ -482,6 +557,20 @@ void *vmalloc(unsigned long size)
 
 EXPORT_SYMBOL(vmalloc);
 
+void *vmalloc_best(unsigned long size)
+{
+       return ____vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, 1);
+}
+
+EXPORT_SYMBOL(vmalloc_best);
+
+void *ub_vmalloc_best(unsigned long size)
+{
+       return ____vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL, 1);
+}
+
+EXPORT_SYMBOL(ub_vmalloc_best);
+
 /**
  *	vmalloc_exec  -  allocate virtually contiguous, executable memory
  *
@@ -593,3 +682,37 @@ finished:
 	read_unlock(&vmlist_lock);
 	return buf - buf_start;
 }
+
+void vprintstat(void)
+{
+	struct vm_struct *p, *last_p = NULL;
+	unsigned long addr, size, free_size, max_free_size;
+	int num;
+
+	addr = VMALLOC_START;
+	size = max_free_size = 0;
+	num = 0;
+
+	read_lock(&vmlist_lock);
+	for (p = vmlist; p; p = p->next) {
+		free_size = (unsigned long)p->addr - addr;
+		if (free_size > max_free_size)
+			max_free_size = free_size;
+		addr = (unsigned long)p->addr + p->size;
+		size += p->size;
+		++num;
+		last_p = p;		
+	}
+	if (last_p) {
+		free_size = VMALLOC_END -
+			((unsigned long)last_p->addr + last_p->size);
+		if (free_size > max_free_size)
+			max_free_size = free_size;
+	}
+	read_unlock(&vmlist_lock);
+
+	printk("VMALLOC Used: %luKB Total: %luKB Entries: %d\n"
+	       "    Max_Free: %luKB Start: %lx End: %lx\n",
+	       size/1024, (VMALLOC_END - VMALLOC_START)/1024, num,
+	       max_free_size/1024, VMALLOC_START, VMALLOC_END);
+}
diff -Nurap linux-2.6.9-100.orig/mm/vmscan.c linux-2.6.9-ve023stab054/mm/vmscan.c
--- linux-2.6.9-100.orig/mm/vmscan.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/mm/vmscan.c	2011-06-15 19:26:22.000000000 +0400
@@ -39,6 +39,8 @@
 
 #include <linux/swapops.h>
 
+#include <ub/ub_mem.h>
+
 /* possible outcome of pageout() */
 typedef enum {
 	/* failed to write page out, page is locked */
@@ -83,6 +85,8 @@ struct scan_control {
 	unsigned int nr_ios;
 
 	int order;			/* order passed in */
+
+	struct oom_freeing_stat oom_stat;
 };
 
 /*
@@ -191,10 +195,11 @@ EXPORT_SYMBOL(remove_shrinker);
  * are eligible for the caller's allocation attempt.  It is used for balancing
  * slab reclaim versus page reclaim.
  */
-int shrink_slab(unsigned long scanned, unsigned int gfp_mask,
+int shrink_slab_stat(struct scan_control *sc, unsigned int gfp_mask,
 			unsigned long lru_pages)
 {
 	struct shrinker *shrinker;
+	unsigned long scanned = sc->nr_scanned;
 
 	if (scanned == 0)
 		scanned = SWAP_CLUSTER_MAX;
@@ -223,6 +228,7 @@ int shrink_slab(unsigned long scanned, u
 			shrink_ret = (*shrinker->shrinker)(this_scan, gfp_mask);
 			if (shrink_ret == -1)
 				break;
+			sc->oom_stat.slabs += shrink_ret;
 			mod_page_state(slabs_scanned, this_scan);
 			total_scan -= this_scan;
 
@@ -235,6 +241,15 @@ int shrink_slab(unsigned long scanned, u
 	return 0;
 }
 
+int shrink_slab(unsigned long scanned, unsigned int gfp_mask,
+			unsigned long lru_pages)
+{
+	struct scan_control sc;
+
+	sc.nr_scanned = scanned;
+	return shrink_slab_stat(&sc, gfp_mask, lru_pages);
+}
+
 /* Called without lock on whether page is mapped, so answer is unstable */
 static inline int page_mapping_inuse(struct page *page)
 {
@@ -408,6 +423,7 @@ static int shrink_list(struct list_head 
 		if (PageAnon(page) && !PageSwapCache(page)) {
 			if (!add_to_swap(page))
 				goto activate_locked;
+			sc->oom_stat.swapped++;
 		}
 #endif /* CONFIG_SWAP */
 
@@ -448,6 +464,7 @@ static int shrink_list(struct list_head 
 			case PAGE_ACTIVATE:
 				goto activate_locked;
 			case PAGE_SUCCESS:
+				sc->oom_stat.written++;
 				if (PageWriteback(page) || PageDirty(page)) {
 					sc->nr_ios++;
 					goto keep;
@@ -611,6 +628,7 @@ static void shrink_cache(struct zone *zo
 		else
 			mod_page_state_zone(zone, pgscan_direct, nr_scan);
 		nr_freed = shrink_list(&page_list, sc);
+		sc->oom_stat.freed += nr_freed;
 		if (current_is_kswapd())
 			mod_page_state(kswapd_steal, nr_freed);
 		mod_page_state_zone(zone, pgsteal, nr_freed);
@@ -675,6 +693,7 @@ refill_inactive_zone(struct zone *zone, 
 	long distress;
 	long swap_tendency;
 
+	KSTAT_PERF_ENTER(refill_inact)
 	lru_add_drain();
 	pgmoved = 0;
 	spin_lock_irq(&zone->lru_lock);
@@ -815,6 +834,8 @@ refill_inactive_zone(struct zone *zone, 
 
 	mod_page_state_zone(zone, pgrefill, pgscanned);
 	mod_page_state(pgdeactivate, pgdeactivate);
+
+	KSTAT_PERF_LEAVE(refill_inact);
 }
 
 /*
@@ -847,7 +868,7 @@ shrink_zone(struct zone *zone, struct sc
 	sc->nr_to_reclaim = SWAP_CLUSTER_MAX;
 
 	while (nr_active || nr_inactive) {
-		if (current->flags & PF_MEMDIE)
+		if (test_tsk_thread_flag(current, TIF_MEMDIE))
 			break;
 		/* stop after we are way above pages_high, someone might have exited */
 		if ((zone->free_pages > zone->pages_high*2) && !sc->order)
@@ -904,7 +925,7 @@ shrink_caches(struct zone **zones, struc
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *zone = zones[i];
 
-		if (current->flags & PF_MEMDIE)
+		if (test_tsk_thread_flag(current, TIF_MEMDIE))
 			return NULL;
 
 		if (zone->present_pages == 0)
@@ -963,6 +984,10 @@ int try_to_free_pages(struct zone **zone
 	unsigned long lru_pages = 0;
 	int i;
 
+	KSTAT_PERF_ENTER(ttfp);
+
+	memset(&sc.oom_stat, 0, sizeof(struct oom_freeing_stat));
+	sc.oom_stat.oom_generation = oom_generation;
 	sc.gfp_mask = gfp_mask;
 	sc.may_writepage = 0;
 	sc.order = order;
@@ -984,11 +1009,11 @@ int try_to_free_pages(struct zone **zone
 		sc.priority = priority;
 		sc.nr_ios = 0;
 
-		if (current->flags & PF_MEMDIE)
+		if (test_tsk_thread_flag(current, TIF_MEMDIE))
 			goto out;
 
 		shrink_caches(zones, &sc);
-		shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
+		shrink_slab_stat(&sc, gfp_mask, lru_pages);
 		if (reclaim_state) {
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			reclaim_state->reclaimed_slab = 0;
@@ -1019,10 +1044,11 @@ int try_to_free_pages(struct zone **zone
 	if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY) &&
 	     !total_reclaimed && sc.nr_congested < SWAP_CLUSTER_MAX &&
 	     free_below_min(zones, gfp_mask, can_try_harder, alloc_type, order))
-		out_of_memory(gfp_mask);
+		out_of_memory(&sc.oom_stat, gfp_mask);
 out:
 	for (i = 0; zones[i] != 0; i++)
 		zones[i]->prev_priority = zones[i]->temp_priority;
+	KSTAT_PERF_LEAVE(ttfp);
 	return ret;
 }
 
@@ -1148,7 +1174,7 @@ scan:
 			sc.priority = priority;
 			shrink_zone(zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
-			shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages);
+			shrink_slab_stat(&sc, GFP_KERNEL, lru_pages);
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_reclaimed += sc.nr_reclaimed;
 			total_scanned += sc.nr_scanned;
@@ -1244,8 +1270,8 @@ static int kswapd(void *p)
 	tsk->flags |= PF_MEMALLOC|PF_KSWAPD;
 
 	for ( ; ; ) {
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 		prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
 		schedule();
 		finish_wait(&pgdat->kswapd_wait, &wait);
@@ -1327,7 +1353,7 @@ static int __init kswapd_init(void)
 	swap_setup();
 	for_each_pgdat(pgdat)
 		pgdat->kswapd
-		= find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
+		= find_task_by_pid_all(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
 	total_memory = nr_free_pagecache_pages();
 	hotcpu_notifier(cpu_callback, 0);
 	return 0;
diff -Nurap linux-2.6.9-100.orig/net/compat.c linux-2.6.9-ve023stab054/net/compat.c
--- linux-2.6.9-100.orig/net/compat.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/compat.c	2011-06-15 19:26:22.000000000 +0400
@@ -310,107 +310,6 @@ void scm_detach_fds_compat(struct msghdr
 }
 
 /*
- * For now, we assume that the compatibility and native version
- * of struct ipt_entry are the same - sfr.  FIXME
- */
-struct compat_ipt_replace {
-	char			name[IPT_TABLE_MAXNAMELEN];
-	u32			valid_hooks;
-	u32			num_entries;
-	u32			size;
-	u32			hook_entry[NF_IP_NUMHOOKS];
-	u32			underflow[NF_IP_NUMHOOKS];
-	u32			num_counters;
-	compat_uptr_t		counters;	/* struct ipt_counters * */
-	struct ipt_entry	entries[0];
-};
-
-static int do_netfilter_replace(int fd, int level, int optname,
-				char __user *optval, int optlen)
-{
-	struct compat_ipt_replace __user *urepl;
-	struct ipt_replace __user *repl_nat;
-	char name[IPT_TABLE_MAXNAMELEN];
-	u32 origsize, tmp32, num_counters;
-	unsigned int repl_nat_size;
-	int ret;
-	int i;
-	compat_uptr_t ucntrs;
-
-	urepl = (struct compat_ipt_replace __user *)optval;
-	if (get_user(origsize, &urepl->size))
-		return -EFAULT;
-
-	/* Hack: Causes ipchains to give correct error msg --RR */
-	if (optlen != sizeof(*urepl) + origsize)
-		return -ENOPROTOOPT;
-
-	/* XXX Assumes that size of ipt_entry is the same both in
-	 *     native and compat environments.
-	 */
-	repl_nat_size = sizeof(*repl_nat) + origsize;
-	repl_nat = compat_alloc_user_space(repl_nat_size);
-
-	ret = -EFAULT;
-	if (put_user(origsize, &repl_nat->size))
-		goto out;
-
-	if (!access_ok(VERIFY_READ, urepl, optlen) ||
-	    !access_ok(VERIFY_WRITE, repl_nat, optlen))
-		goto out;
-
-	if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
-	    __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
-		goto out;
-
-	if (__get_user(tmp32, &urepl->valid_hooks) ||
-	    __put_user(tmp32, &repl_nat->valid_hooks))
-		goto out;
-
-	if (__get_user(tmp32, &urepl->num_entries) ||
-	    __put_user(tmp32, &repl_nat->num_entries))
-		goto out;
-
-	if (__get_user(num_counters, &urepl->num_counters) ||
-	    __put_user(num_counters, &repl_nat->num_counters))
-		goto out;
-
-	if (__get_user(ucntrs, &urepl->counters) ||
-	    __put_user(compat_ptr(ucntrs), &repl_nat->counters))
-		goto out;
-
-	if (__copy_in_user(&repl_nat->entries[0],
-			   &urepl->entries[0],
-			   origsize))
-		goto out;
-
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
-		if (__get_user(tmp32, &urepl->hook_entry[i]) ||
-		    __put_user(tmp32, &repl_nat->hook_entry[i]) ||
-		    __get_user(tmp32, &urepl->underflow[i]) ||
-		    __put_user(tmp32, &repl_nat->underflow[i]))
-			goto out;
-	}
-
-	/*
-	 * Since struct ipt_counters just contains two u_int64_t members
-	 * we can just do the access_ok check here and pass the (converted)
-	 * pointer into the standard syscall.  We hope that the pointer is
-	 * not misaligned ...
-	 */
-	if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
-		       num_counters * sizeof(struct ipt_counters)))
-		goto out;
-
-
-	ret = sys_setsockopt(fd, level, optname,
-			     (char __user *)repl_nat, repl_nat_size);
-
-out:
-	return ret;
-}
-
-/*
  * A struct sock_filter is architecture independent.
  */
 struct compat_sock_fprog {
@@ -462,10 +361,6 @@ static int do_set_sock_timeout(int fd, i
 asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
 				char __user *optval, int optlen)
 {
-	/* SO_SET_REPLACE seems to be the same in all levels */
-	if (optname == IPT_SO_SET_REPLACE)
-		return do_netfilter_replace(fd, level, optname,
-					    optval, optlen);
 	if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER)
 		return do_set_attach_filter(fd, level, optname,
 					    optval, optlen);
diff -Nurap linux-2.6.9-100.orig/net/core/datagram.c linux-2.6.9-ve023stab054/net/core/datagram.c
--- linux-2.6.9-100.orig/net/core/datagram.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/datagram.c	2011-06-15 19:26:19.000000000 +0400
@@ -54,6 +54,8 @@
 #include <net/sock.h>
 #include <net/checksum.h>
 
+#include <ub/ub_net.h>
+
 
 /*
  *	Is a socket 'connection oriented' ?
@@ -454,6 +456,7 @@ unsigned int datagram_poll(struct file *
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask;
+	int no_ubc_space;
 
 	sock_poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
@@ -461,8 +464,14 @@ unsigned int datagram_poll(struct file *
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
 		mask |= POLLERR;
-	if (sk->sk_shutdown == SHUTDOWN_MASK)
+	if (sk->sk_shutdown == SHUTDOWN_MASK) {
+		no_ubc_space = 0;
 		mask |= POLLHUP;
+	} else {
+		no_ubc_space = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
+		if (no_ubc_space)
+			ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
+	}
 
 	/* readable? */
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
@@ -479,7 +488,7 @@ unsigned int datagram_poll(struct file *
 	}
 
 	/* writable? */
-	if (sock_writeable(sk))
+	if (!no_ubc_space && sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
diff -Nurap linux-2.6.9-100.orig/net/core/dev.c linux-2.6.9-ve023stab054/net/core/dev.c
--- linux-2.6.9-100.orig/net/core/dev.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/dev.c	2011-06-15 19:26:22.000000000 +0400
@@ -113,6 +113,7 @@
 #include <net/iw_handler.h>
 #endif	/* CONFIG_NET_RADIO */
 #include <asm/current.h>
+#include <ub/beancounter.h>
 
 /* This define, if set, will randomly drop a packet when congestion
  * is more than moderate.  It helps fairness in the multi-interface
@@ -188,25 +189,40 @@ static struct timer_list samp_timer = TI
  * unregister_netdevice(), which must be called with the rtnl
  * semaphore held.
  */
+#if defined(CONFIG_VE)
+#define dev_tail	(get_exec_env()->_net_dev_tail)
+#else
 struct net_device *dev_base;
 struct net_device **dev_tail = &dev_base;
-rwlock_t dev_base_lock = RW_LOCK_UNLOCKED;
-
 EXPORT_SYMBOL(dev_base);
+#endif
+
+rwlock_t dev_base_lock = RW_LOCK_UNLOCKED;
 EXPORT_SYMBOL(dev_base_lock);
 
+#ifdef CONFIG_VE
+#define MAX_UNMOVABLE_NETDEVICES (8*4096)
+static uint8_t unmovable_ifindex_list[MAX_UNMOVABLE_NETDEVICES/8];
+static LIST_HEAD(dev_global_list);
+#endif
+
 #define NETDEV_HASHBITS	8
 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 
-static inline struct hlist_head *dev_name_hash(const char *name)
+struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env)
 {
-	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+	unsigned hash;
+	if (!ve_is_super(env))
+		return visible_dev_head(env);
+	hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 }
 
-static inline struct hlist_head *dev_index_hash(int ifindex)
+struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env)
 {
+	if (!ve_is_super(env))
+		return visible_dev_index_head(env);
 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 }
 
@@ -494,7 +510,7 @@ struct net_device *__dev_get_by_name(con
 {
 	struct hlist_node *p;
 
-	hlist_for_each(p, dev_name_hash(name)) {
+	hlist_for_each(p, dev_name_hash(name, get_exec_env())) {
 		struct net_device *dev
 			= hlist_entry(p, struct net_device, name_hlist);
 		if (!strncmp(dev->name, name, IFNAMSIZ))
@@ -527,6 +543,28 @@ struct net_device *dev_get_by_name(const
 }
 
 /**
+ *	__dev_global_get_by_name - find a device by its name in dev_global_list
+ *	@name: name to find
+ *
+ *	Find an interface by name. Must be called under RTNL semaphore
+ *	If the name is found a pointer to the device
+ *	is returned. If the name is not found then %NULL is returned. The
+ *	reference counters are not incremented so the caller must be
+ *	careful with locks.
+ */
+
+struct net_device *__dev_global_get_by_name(const char *name)
+{
+	struct net_device *dev;
+	/* It's called relatively rarely */
+	list_for_each_entry(dev, &dev_global_list, dev_global_list_entry) {
+		if (strncmp(dev->name, name, IFNAMSIZ) == 0)
+			return dev;
+	}
+	return NULL;
+}
+
+/**
  *	__dev_get_by_index - find a device by its ifindex
  *	@ifindex: index of device
  *
@@ -541,7 +579,7 @@ struct net_device *__dev_get_by_index(in
 {
 	struct hlist_node *p;
 
-	hlist_for_each(p, dev_index_hash(ifindex)) {
+	hlist_for_each(p, dev_index_hash(ifindex, get_exec_env())) {
 		struct net_device *dev
 			= hlist_entry(p, struct net_device, index_hlist);
 		if (dev->ifindex == ifindex)
@@ -670,6 +708,23 @@ int dev_valid_name(const char *name)
  *	of the unit assigned or a negative errno code.
  */
 
+static inline void __dev_check_name(const char *dev_name, const char *name, 
+	long *inuse, const int max_netdevices)
+{
+	int i = 0;
+	char buf[IFNAMSIZ];
+
+	if (!sscanf(dev_name, name, &i))
+		return;
+	if (i < 0 || i >= max_netdevices)
+		return;
+
+	/* avoid cases where sscanf is not exact inverse of printf */
+	snprintf(buf, sizeof(buf), name, i);
+	if (!strncmp(buf, dev_name, IFNAMSIZ))
+		set_bit(i, inuse);
+}
+
 int dev_alloc_name(struct net_device *dev, const char *name)
 {
 	int i = 0;
@@ -694,16 +749,18 @@ int dev_alloc_name(struct net_device *de
 		if (!inuse)
 			return -ENOMEM;
 
-		for (d = dev_base; d; d = d->next) {
-			if (!sscanf(d->name, name, &i))
-				continue;
-			if (i < 0 || i >= max_netdevices)
-				continue;
-
-			/*  avoid cases where sscanf is not exact inverse of printf */
-			snprintf(buf, sizeof(buf), name, i);
-			if (!strncmp(buf, d->name, IFNAMSIZ))
-				set_bit(i, inuse);
+		if (ve_is_super(get_exec_env())) {
+			list_for_each_entry(d, &dev_global_list, 
+					dev_global_list_entry) {
+				__dev_check_name(d->name, name, inuse, 
+					max_netdevices);
+			}
+		}
+		else {
+			for (d = dev_base; d; d = d->next) {
+				__dev_check_name(d->name, name, inuse, 
+					max_netdevices);
+			}
 		}
 
 		i = find_first_zero_bit(inuse, max_netdevices);
@@ -711,7 +768,11 @@ int dev_alloc_name(struct net_device *de
 	}
 
 	snprintf(buf, sizeof(buf), name, i);
-	if (!__dev_get_by_name(buf)) {
+	if (ve_is_super(get_exec_env()))
+		d = __dev_global_get_by_name(buf);
+	else
+		d = __dev_get_by_name(buf);
+	if (d == NULL) {
 		strlcpy(dev->name, buf, IFNAMSIZ);
 		return i;
 	}
@@ -744,13 +805,15 @@ int dev_change_name(struct net_device *d
 	if (!dev_valid_name(newname))
 		return -EINVAL;
 
+	/* Rename of devices in VE is prohibited by CAP_NET_ADMIN */
+
 	if (strchr(newname, '%')) {
 		err = dev_alloc_name(dev, newname);
 		if (err < 0)
 			return err;
 		strcpy(newname, dev->name);
 	}
-	else if (__dev_get_by_name(newname))
+	else if (__dev_global_get_by_name(newname))
 		return -EEXIST;
 	else
 		strlcpy(dev->name, newname, IFNAMSIZ);
@@ -758,7 +821,8 @@ int dev_change_name(struct net_device *d
 	err = class_device_rename(&dev->class_dev, dev->name);
 	if (!err) {
 		hlist_del(&dev->name_hlist);
-		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name, 
+							get_exec_env()));
 		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 	}
 
@@ -1292,6 +1356,34 @@ int dev_queue_xmit(struct sk_buff *skb)
 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
 	if (q->enqueue) {
+#if 0
+		/*
+		 * XXX this code is broken:
+		 *  1) it is activated for normal devices in VE0,
+		 *  2) it doesn't use API functions like ub_skb_set_charge,
+		 *  3) it isn't allowed to charge skb as UB_OTHERSOCKBUF
+		 *     if its socket is TCP.
+		 */
+		struct user_beancounter *ub;
+
+		ub = netdev_bc(dev)->exec_ub;
+		/* the skb CAN be already charged if it transmitted via
+		 * something like bonding device */
+		if (ub && (skb_bc(skb)->resource == 0)) {
+			unsigned long chargesize;
+			chargesize = skb_charge_fullsize(skb);
+			if (charge_beancounter(ub, UB_OTHERSOCKBUF,
+						chargesize, UB_SOFT)) {
+				rcu_read_unlock();
+				rc = -ENOMEM;
+				goto out_kfree_skb;
+			}
+			skb_bc(skb)->ub = ub;
+			skb_bc(skb)->charged = chargesize;
+			skb_bc(skb)->resource = UB_OTHERSOCKBUF;
+		}
+#endif
+
 		/* Grab device queue */
 		spin_lock(&dev->queue_lock);
 
@@ -1643,6 +1735,7 @@ int netif_receive_skb(struct sk_buff *sk
 	struct net_device *null_or_bond;
 	int ret = NET_RX_DROP;
 	unsigned short type;
+	struct ve_struct *old_env;
 
 #ifdef CONFIG_NETPOLL
 	if (skb->dev->poll && netpoll_rx(skb))
@@ -1665,6 +1758,15 @@ int netif_receive_skb(struct sk_buff *sk
 	skb->h.raw = skb->nh.raw = skb->data;
 	skb->mac_len = skb->nh.raw - skb->mac.raw;
 
+	/*
+	 * Skb might be alloced in another VE context, than its device works.
+	 * So, set the correct owner_env.
+	 */
+	skb->owner_env = skb->dev->owner_env;
+	BUG_ON(skb->owner_env == NULL);
+
+	old_env = set_exec_env(VE_OWNER_SKB(skb));
+
 	pt_prev = NULL;
 
 	rcu_read_lock();
@@ -1765,6 +1867,7 @@ ncls:
 
 out:
 	rcu_read_unlock();
+	(void)set_exec_env(old_env);
 	return ret;
 }
 
@@ -2161,7 +2264,8 @@ static int __init dev_proc_init(void)
 
 	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
 		goto out;
-	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
+	if (!__proc_net_fops_create("net/softnet_stat", S_IRUGO,
+				&softnet_seq_fops, NULL))
 		goto out_dev;
 	if (wireless_proc_init())
 		goto out_softnet;
@@ -2169,7 +2273,7 @@ static int __init dev_proc_init(void)
 out:
 	return rc;
 out_softnet:
-	proc_net_remove("softnet_stat");
+	__proc_net_remove("net/softnet_stat");
 out_dev:
 	proc_net_remove("dev");
 	goto out;
@@ -2235,9 +2339,12 @@ void dev_set_promiscuity(struct net_devi
 	dev->flags |= IFF_PROMISC;
 	if ((dev->promiscuity += inc) == 0)
 		dev->flags &= ~IFF_PROMISC;
+	/* Promiscous mode on these devices does not mean anything */
+	if (dev->flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
+		return;
 	if (dev->flags ^ old_flags) {
 		dev_mc_upload(dev);
-		printk(KERN_INFO "device %s %s promiscuous mode\n",
+		ve_printk(VE_LOG, KERN_INFO "device %s %s promiscuous mode\n",
 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
 		       					       "left");
 	}
@@ -2644,9 +2751,18 @@ int dev_ioctl(unsigned int cmd, void __u
 		 *	- require strict serialization.
 		 *	- do not return a value
 		 */
+		case SIOCSIFMTU:
+			if (!capable(CAP_NET_ADMIN) &&
+			    !capable(CAP_VE_NET_ADMIN))
+				return -EPERM;
+			dev_load(ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ifsioc(&ifr, cmd);
+			rtnl_unlock();
+			return ret;
+		
 		case SIOCSIFFLAGS:
 		case SIOCSIFMETRIC:
-		case SIOCSIFMTU:
 		case SIOCSIFMAP:
 		case SIOCSIFHWADDR:
 		case SIOCSIFSLAVE:
@@ -2723,25 +2839,75 @@ int dev_ioctl(unsigned int cmd, void __u
 	}
 }
 
-
 /**
  *	dev_new_index	-	allocate an ifindex
  *
  *	Returns a suitable unique value for a new device interface
- *	number.  The caller must hold the rtnl semaphore or the
+ *	number. The caller must hold the rtnl semaphore or the
  *	dev_base_lock to be sure it remains unique.
+ *
+ *	Note: dev->name must be valid on entrance
  */
-static int dev_new_index(void)
+static int dev_ve_new_index(void)
 {
-	static int ifindex;
+#ifdef CONFIG_VE
+	int *ifindex = &get_exec_env()->ifindex;
+	int delta = 2;
+#else
+	static int s_ifindex;
+	int *ifindex = &s_ifindex;
+	int delta = 1;
+#endif
 	for (;;) {
-		if (++ifindex <= 0)
-			ifindex = 1;
-		if (!__dev_get_by_index(ifindex))
-			return ifindex;
+		*ifindex += delta;
+		if (*ifindex <= 0)
+			*ifindex = 1;
+		if (!__dev_get_by_index(*ifindex))
+			return *ifindex;
 	}
 }
 
+static int dev_glb_new_index(void)
+{
+#ifdef CONFIG_VE
+	int i;
+
+	i = find_first_zero_bit((long*)unmovable_ifindex_list, 
+		MAX_UNMOVABLE_NETDEVICES);
+	
+	if (i == MAX_UNMOVABLE_NETDEVICES)
+		return -EMFILE;
+
+	__set_bit(i, (long*)unmovable_ifindex_list);
+	return (i + 1) * 2;
+#endif
+}
+
+static void dev_glb_free_index(struct net_device *dev)
+{
+#ifdef CONFIG_VE
+	int bit;
+
+	bit = dev->ifindex / 2 - 1;
+	BUG_ON(bit >= MAX_UNMOVABLE_NETDEVICES);
+	__clear_bit(bit, (long*)unmovable_ifindex_list);
+#endif
+}
+
+int dev_new_index(struct net_device *dev)
+{
+	if (ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
+		return dev_glb_new_index();
+
+	return dev_ve_new_index();
+}
+
+void dev_free_index(struct net_device *dev)
+{
+	if ((dev->ifindex % 2) == 0)
+		dev_glb_free_index(dev);
+}
+
 static int dev_boot_phase = 1;
 
 /* Delayed registration/unregisteration */
@@ -2755,6 +2921,12 @@ static inline void net_set_todo(struct n
 	spin_unlock(&net_todo_list_lock);
 }
 
+void netdevice_notify(int event, struct net_device *dev)
+{
+	notifier_call_chain(&netdev_chain, event, dev);
+}
+EXPORT_SYMBOL(netdevice_notify);
+
 /**
  *	register_netdevice	- register a network device
  *	@dev: device to register
@@ -2785,6 +2957,10 @@ int register_netdevice(struct net_device
 	/* When net_device's are persistent, this will be fatal. */
 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
 
+	ret = -EPERM;
+	if (!ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
+		goto out;
+
 	spin_lock_init(&dev->queue_lock);
 	spin_lock_init(&dev->xmit_lock);
 	dev->xmit_lock_owner = -1;
@@ -2804,27 +2980,32 @@ int register_netdevice(struct net_device
 		if (ret) {
 			if (ret > 0)
 				ret = -EIO;
-			goto out_err;
+			goto out_free_div;
 		}
 	}
  
 	if (!dev_valid_name(dev->name)) {
 		ret = -EINVAL;
-		goto out_err;
+		goto out_free_div;
+	}
+
+	dev->ifindex = dev_new_index(dev);
+	if (dev->ifindex < 0) {
+		ret = dev->ifindex;
+		goto out_free_div;
 	}
 
-	dev->ifindex = dev_new_index();
 	if (dev->iflink == -1)
 		dev->iflink = dev->ifindex;
 
 	/* Check for existence of name */
-	head = dev_name_hash(dev->name);
+	head = dev_name_hash(dev->name, get_exec_env());
 	hlist_for_each(p, head) {
 		struct net_device *d
 			= hlist_entry(p, struct net_device, name_hlist);
 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
 			ret = -EEXIST;
- 			goto out_err;
+ 			goto out_free_ind;
 		}
  	}
 
@@ -2854,12 +3035,18 @@ int register_netdevice(struct net_device
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 
 	dev->next = NULL;
+	dev->owner_env = get_exec_env();
+	netdev_bc(dev)->owner_ub = get_beancounter(get_exec_ub());
+	netdev_bc(dev)->exec_ub = get_beancounter(get_exec_ub());
 	dev_init_scheduler(dev);
+	if (ve_is_super(get_exec_env()))
+		list_add_tail(&dev->dev_global_list_entry, &dev_global_list);
 	write_lock_bh(&dev_base_lock);
 	*dev_tail = dev;
 	dev_tail = &dev->next;
 	hlist_add_head(&dev->name_hlist, head);
-	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
+	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, 
+						get_exec_env()));
 	dev_hold(dev);
 	dev->reg_state = NETREG_REGISTERING;
 	write_unlock_bh(&dev_base_lock);
@@ -2873,7 +3060,9 @@ int register_netdevice(struct net_device
 
 out:
 	return ret;
-out_err:
+out_free_ind:
+	dev_free_index(dev);
+out_free_div:
 	free_divert_blk(dev);
 	goto out;
 }
@@ -2891,6 +3080,7 @@ out_err:
  */
 static void netdev_wait_allrefs(struct net_device *dev)
 {
+	int i = 0;
 	unsigned long rebroadcast_time, warning_time;
 
 	rebroadcast_time = warning_time = jiffies;
@@ -2913,7 +3103,7 @@ static void netdev_wait_allrefs(struct n
 				linkwatch_run_queue();
 			}
 
-			rtnl_shunlock();
+			__rtnl_shunlock();
 
 			rebroadcast_time = jiffies;
 		}
@@ -2923,11 +3113,20 @@ static void netdev_wait_allrefs(struct n
 
 		if (time_after(jiffies, warning_time + 10 * HZ)) {
 			printk(KERN_EMERG "unregister_netdevice: "
-			       "waiting for %s to become free. Usage "
-			       "count = %d\n",
-			       dev->name, atomic_read(&dev->refcnt));
+			       "waiting for %s=%p to become free. Usage "
+			       "count = %d CT=%u\n",
+			       dev->name, dev, atomic_read(&dev->refcnt),
+			       VEID(get_exec_env()));
 			warning_time = jiffies;
 		}
+
+		if (++i > 200) {	/* give 50 seconds to try */
+			dev->is_leaked = 1;
+			printk(KERN_EMERG "unregister_netdevice: "
+			       "device %p marked to leak\n", dev);
+			break;
+		}
+
 	}
 }
 
@@ -2957,6 +3156,7 @@ void netdev_run_todo(void)
 {
 	struct list_head list = LIST_HEAD_INIT(list);
 	int err;
+	struct ve_struct *current_env;
 
 
 	/* Need to guard against multiple cpu's getting out of order. */
@@ -2975,32 +3175,46 @@ void netdev_run_todo(void)
 	list_splice_init(&net_todo_list, &list);
 	spin_unlock(&net_todo_list_lock);
 		
+	current_env = get_exec_env();
 	while (!list_empty(&list)) {
 		struct net_device *dev
 			= list_entry(list.next, struct net_device, todo_list);
 		list_del(&dev->todo_list);
 
+		(void)set_exec_env(dev->owner_env);
 		switch(dev->reg_state) {
 		case NETREG_REGISTERING:
 			err = netdev_register_sysfs(dev);
-			if (err)
+			if (err) {
 				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
 				       dev->name, err);
+				dev->reg_state = NETREG_REGISTER_ERR;
+				break;
+			}
 			dev->reg_state = NETREG_REGISTERED;
 			break;
 
 		case NETREG_UNREGISTERING:
 			netdev_unregister_sysfs(dev);
+			/* fall through */
+
+		case NETREG_REGISTER_ERR:
 			dev->reg_state = NETREG_UNREGISTERED;
 
 			netdev_wait_allrefs(dev);
 
 			/* paranoia */
+#if 0
 			BUG_ON(atomic_read(&dev->refcnt));
 			BUG_TRAP(!dev->ip_ptr);
 			BUG_TRAP(!dev->ip6_ptr);
 			BUG_TRAP(!dev->dn_ptr);
+#endif
 
+			put_beancounter(netdev_bc(dev)->exec_ub);
+			put_beancounter(netdev_bc(dev)->owner_ub);
+			netdev_bc(dev)->exec_ub = NULL;
+			netdev_bc(dev)->owner_ub = NULL;
 
 			/* It must be the very last action, 
 			 * after this 'dev' may point to freed up memory.
@@ -3015,6 +3229,7 @@ void netdev_run_todo(void)
 			break;
 		}
 	}
+	(void)set_exec_env(current_env);
 
 out:
 	up(&net_todo_run_mutex);
@@ -3030,6 +3245,12 @@ out:
  */
 void free_netdev(struct net_device *dev)
 {
+	if (dev->is_leaked) {
+		printk(KERN_EMERG "free_netdev: device %s=%p leaked CT=%u\n",
+		       dev->name, dev, VEID(get_exec_env()));
+		return;
+	}
+
 #ifdef CONFIG_SYSFS
 	/*  Compatiablity with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
@@ -3081,7 +3302,8 @@ int unregister_netdevice(struct net_devi
 		return -ENODEV;
 	}
 
-	BUG_ON(dev->reg_state != NETREG_REGISTERED);
+	BUG_ON(dev->reg_state != NETREG_REGISTERED &&
+	       dev->reg_state != NETREG_REGISTER_ERR);
 
 	/* If device is running, close it first. */
 	if (dev->flags & IFF_UP)
@@ -3097,6 +3319,8 @@ int unregister_netdevice(struct net_devi
 				dev_tail = dp;
 			*dp = d->next;
 			write_unlock_bh(&dev_base_lock);
+			if (ve_is_super(get_exec_env()))
+				list_del(&dev->dev_global_list_entry);
 			break;
 		}
 	}
@@ -3106,7 +3330,8 @@ int unregister_netdevice(struct net_devi
 		return -ENODEV;
 	}
 
-	dev->reg_state = NETREG_UNREGISTERING;
+	if (dev->reg_state != NETREG_REGISTER_ERR)
+		dev->reg_state = NETREG_UNREGISTERING;
 
 	synchronize_net();
 
@@ -3130,6 +3355,8 @@ int unregister_netdevice(struct net_devi
 	/* Notifier chain MUST detach us from master device. */
 	BUG_TRAP(!dev->master);
 
+	dev_free_index(dev);
+
 	free_divert_blk(dev);
 
 	/* Finish processing unregister after unlock */
@@ -3275,6 +3502,9 @@ EXPORT_SYMBOL(dev_get_by_flags);
 EXPORT_SYMBOL(dev_get_by_index);
 EXPORT_SYMBOL(dev_get_by_name);
 EXPORT_SYMBOL(dev_ioctl);
+EXPORT_SYMBOL(dev_new_index);
+EXPORT_SYMBOL(dev_name_hash);
+EXPORT_SYMBOL(dev_index_hash);
 EXPORT_SYMBOL(dev_open);
 EXPORT_SYMBOL(dev_queue_xmit);
 EXPORT_SYMBOL(dev_remove_pack);
diff -Nurap linux-2.6.9-100.orig/net/core/dev_mcast.c linux-2.6.9-ve023stab054/net/core/dev_mcast.c
--- linux-2.6.9-100.orig/net/core/dev_mcast.c	2011-06-09 19:22:44.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/dev_mcast.c	2011-06-15 19:26:19.000000000 +0400
@@ -310,3 +310,4 @@ void __init dev_mcast_init(void)
 EXPORT_SYMBOL(dev_mc_add);
 EXPORT_SYMBOL(dev_mc_delete);
 EXPORT_SYMBOL(dev_mc_upload);
+EXPORT_SYMBOL(dev_mc_discard);
diff -Nurap linux-2.6.9-100.orig/net/core/dst.c linux-2.6.9-ve023stab054/net/core/dst.c
--- linux-2.6.9-100.orig/net/core/dst.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/dst.c	2011-06-15 19:26:19.000000000 +0400
@@ -45,6 +45,7 @@ static struct timer_list dst_gc_timer =
 static void dst_run_gc(unsigned long dummy)
 {
 	int    delayed = 0;
+	int    work_performed;
 	struct dst_entry * dst, **dstp;
 
 	if (!spin_trylock(&dst_lock)) {
@@ -52,9 +53,9 @@ static void dst_run_gc(unsigned long dum
 		return;
 	}
 
-
 	del_timer(&dst_gc_timer);
 	dstp = &dst_garbage_list;
+	work_performed = 0;
 	while ((dst = *dstp) != NULL) {
 		if (atomic_read(&dst->__refcnt)) {
 			dstp = &dst->next;
@@ -62,6 +63,7 @@ static void dst_run_gc(unsigned long dum
 			continue;
 		}
 		*dstp = dst->next;
+		work_performed = 1;
 
 		dst = dst_destroy(dst);
 		if (dst) {
@@ -86,15 +88,19 @@ static void dst_run_gc(unsigned long dum
 		dst_gc_timer_inc = DST_GC_MAX;
 		goto out;
 	}
-	if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
-		dst_gc_timer_expires = DST_GC_MAX;
-	dst_gc_timer_inc += DST_GC_INC;
-	dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
+	if (!work_performed) {
+		if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
+			dst_gc_timer_expires = DST_GC_MAX;
+		dst_gc_timer_inc += DST_GC_INC;
+	} else {
+		dst_gc_timer_inc = DST_GC_INC;
+		dst_gc_timer_expires = DST_GC_MIN;
+	}
 #if RT_CACHE_DEBUG >= 2
 	printk("dst_total: %d/%d %ld\n",
 	       atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
 #endif
-	add_timer(&dst_gc_timer);
+	mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
 
 out:
 	spin_unlock(&dst_lock);
@@ -229,13 +235,13 @@ static void dst_ifdown(struct dst_entry 
 
 	do {
 		if (unregister) {
-			dst->dev = &loopback_dev;
-			dev_hold(&loopback_dev);
+			dst->dev = &visible_loopback_dev;
+			dev_hold(&visible_loopback_dev);
 			dev_put(dev);
 			if (dst->neighbour && dst->neighbour->dev == dev) {
-				dst->neighbour->dev = &loopback_dev;
+				dst->neighbour->dev = &visible_loopback_dev;
 				dev_put(dev);
-				dev_hold(&loopback_dev);
+				dev_hold(&visible_loopback_dev);
 			}
 		}
 
@@ -253,12 +259,15 @@ static int dst_dev_event(struct notifier
 	switch (event) {
 	case NETDEV_UNREGISTER:
 	case NETDEV_DOWN:
-		spin_lock_bh(&dst_lock);
+		local_bh_disable();
+		dst_run_gc(0);
+		spin_lock(&dst_lock);
 		for (dst = dst_garbage_list; dst; dst = dst->next) {
 			if (dst->dev == dev)
 				dst_ifdown(dst, event != NETDEV_DOWN);
 		}
-		spin_unlock_bh(&dst_lock);
+		spin_unlock(&dst_lock);
+		local_bh_enable();
 		break;
 	}
 	return NOTIFY_DONE;
diff -Nurap linux-2.6.9-100.orig/net/core/dv.c linux-2.6.9-ve023stab054/net/core/dv.c
--- linux-2.6.9-100.orig/net/core/dv.c	2004-10-19 01:54:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/dv.c	2011-06-15 19:26:19.000000000 +0400
@@ -93,6 +93,8 @@ void free_divert_blk(struct net_device *
 	}
 }
 
+EXPORT_SYMBOL(free_divert_blk);
+
 /*
  * Adds a tcp/udp (source or dest) port to an array
  */
diff -Nurap linux-2.6.9-100.orig/net/core/filter.c linux-2.6.9-ve023stab054/net/core/filter.c
--- linux-2.6.9-100.orig/net/core/filter.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/filter.c	2011-06-15 19:26:19.000000000 +0400
@@ -369,7 +369,7 @@ int sk_attach_filter(struct sock_fprog *
         if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
                 return -EINVAL;
 
-	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL_UBC);
 	if (!fp)
 		return -ENOMEM;
 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
diff -Nurap linux-2.6.9-100.orig/net/core/neighbour.c linux-2.6.9-ve023stab054/net/core/neighbour.c
--- linux-2.6.9-100.orig/net/core/neighbour.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/neighbour.c	2011-06-15 19:26:20.000000000 +0400
@@ -33,6 +33,7 @@
 #include <net/netevent.h>
 #include <linux/rtnetlink.h>
 #include <linux/random.h>
+#include <ub/beancounter.h>
 
 #define NEIGH_DEBUG 1
 
@@ -241,6 +242,7 @@ static struct neighbour *neigh_alloc(str
 	int entries;
 
 	entries = atomic_inc_return(&tbl->entries) - 1;
+	n = ERR_PTR(-ENOBUFS);
 	if (entries >= tbl->gc_thresh3 ||
 	    (entries >= tbl->gc_thresh2 &&
 	     time_after(now, tbl->last_flush + 5 * HZ))) {
@@ -251,7 +253,7 @@ static struct neighbour *neigh_alloc(str
 
 	n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
 	if (!n)
-		goto out_entries;
+		goto out_nomem;
 
 	memset(n, 0, tbl->entry_size);
 
@@ -273,6 +275,8 @@ static struct neighbour *neigh_alloc(str
 out:
 	return n;
 
+out_nomem:
+	n = ERR_PTR(-ENOMEM);
 out_entries:
 	atomic_dec(&tbl->entries);
 	goto out;
@@ -388,12 +392,11 @@ struct neighbour *neigh_create(struct ne
 	u32 hash_val;
 	int key_len = tbl->key_len;
 	int error;
-	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+	struct neighbour *n1, *rc, *n;
 
-	if (!n) {
-		rc = ERR_PTR(-ENOBUFS);
+	rc = n = neigh_alloc(tbl);
+	if (IS_ERR(n))
 		goto out;
-	}
 
 	memcpy(n->primary_key, pkey, key_len);
 	n->dev = dev;
@@ -640,6 +643,8 @@ static void neigh_periodic_timer(unsigne
 	struct neigh_table *tbl = (struct neigh_table *)arg;
 	struct neighbour *n, **np;
 	unsigned long expire, now = jiffies;
+	struct ve_struct *env = set_exec_env(tbl->owner_env);
+	struct user_beancounter *ub = set_exec_ub(tbl->owner_ub);
 
 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 
@@ -701,6 +706,8 @@ next_elt:
  	mod_timer(&tbl->gc_timer, now + expire);
 
 	write_unlock(&tbl->lock);
+	set_exec_ub(ub);
+	set_exec_env(env);
 }
 
 static __inline__ int neigh_max_probes(struct neighbour *n)
@@ -742,6 +749,11 @@ static void neigh_timer_handler(unsigned
 	struct neighbour *neigh = (struct neighbour *)arg;
 	unsigned state;
 	int notify = 0;
+	struct ve_struct *env;
+	struct user_beancounter *ub;
+
+	env = set_exec_env(neigh->dev->owner_env);
+	ub = set_exec_ub(netdev_bc(neigh->dev)->exec_ub);
 
 	write_lock(&neigh->lock);
 
@@ -831,6 +843,8 @@ out:
 		neigh_app_notify(neigh);
 #endif
 	neigh_release(neigh);
+	(void)set_exec_ub(ub);
+	set_exec_env(env);
 }
 
 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
@@ -1222,6 +1236,9 @@ static void neigh_proxy_process(unsigned
 	unsigned long now = jiffies;
 	struct sk_buff *skb;
 
+	struct ve_struct *env = set_exec_env(tbl->owner_env);
+	struct user_beancounter *ub = set_exec_ub(tbl->owner_ub);
+
 	spin_lock(&tbl->proxy_queue.lock);
 
 	skb = tbl->proxy_queue.next;
@@ -1233,6 +1250,7 @@ static void neigh_proxy_process(unsigned
 		skb = skb->next;
 		if (tdif <= 0) {
 			struct net_device *dev = back->dev;
+
 			__skb_unlink(back, &tbl->proxy_queue);
 			if (tbl->proxy_redo && netif_running(dev))
 				tbl->proxy_redo(back);
@@ -1247,6 +1265,8 @@ static void neigh_proxy_process(unsigned
 	if (sched_next)
 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
 	spin_unlock(&tbl->proxy_queue.lock);
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(env);
 }
 
 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
@@ -1335,12 +1355,15 @@ void neigh_parms_destroy(struct neigh_pa
 }
 
 
-void neigh_table_init(struct neigh_table *tbl)
+int neigh_table_init(struct neigh_table *tbl)
 {
 	unsigned long now = jiffies;
 	unsigned long phsize;
 
 	atomic_set(&tbl->parms.refcnt, 1);
+	atomic_set(&tbl->entries, 0);
+	tbl->hash_chain_gc = 0;
+	tbl->parms.next = NULL;
 	INIT_RCU_HEAD(&tbl->parms.rcu_head);
 	tbl->parms.reachable_time =
 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
@@ -1348,22 +1371,30 @@ void neigh_table_init(struct neigh_table
 	if (!tbl->kmem_cachep)
 		tbl->kmem_cachep = kmem_cache_create(tbl->id,
 						     tbl->entry_size,
-						     0, SLAB_HWCACHE_ALIGN,
+						     0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						     NULL, NULL);
 
 	if (!tbl->kmem_cachep)
-		panic("cannot create neighbour cache");
+		return -ENOMEM;
+
+	tbl->owner_env = get_ve(get_exec_env());
+	tbl->owner_ub = get_beancounter(get_exec_ub());
 
 	tbl->stats = alloc_percpu(struct neigh_statistics);
 	if (!tbl->stats)
-		panic("cannot create neighbour cache statistics");
+		goto out;
 	
 #ifdef CONFIG_PROC_FS
-	tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
-	if (!tbl->pde) 
-		panic("cannot create neighbour proc dir entry");
-	tbl->pde->proc_fops = &neigh_stat_seq_fops;
-	tbl->pde->data = tbl;
+	if (ve_is_super(get_exec_env())) {
+		char name[strlen(tbl->id) + sizeof("net/stat/")];
+		strcpy(name, "net/stat/");
+		strcat(name, tbl->id);
+		tbl->pde = create_proc_glob_entry(name, S_IRUGO, NULL);
+		if (tbl->pde) {
+			tbl->pde->proc_fops = &neigh_stat_seq_fops;
+			tbl->pde->data = tbl;
+		}
+	}
 #endif
 
 	tbl->hash_mask = 1;
@@ -1373,7 +1404,7 @@ void neigh_table_init(struct neigh_table
 	tbl->phash_buckets = kmalloc(phsize, GFP_KERNEL);
 
 	if (!tbl->hash_buckets || !tbl->phash_buckets)
-		panic("cannot allocate neighbour cache hashes");
+		goto nomem;
 
 	memset(tbl->phash_buckets, 0, phsize);
 
@@ -1397,6 +1428,25 @@ void neigh_table_init(struct neigh_table
 	tbl->next	= neigh_tables;
 	neigh_tables	= tbl;
 	write_unlock(&neigh_tbl_lock);
+	return 0;
+
+nomem:
+	if (tbl->hash_buckets) {
+		neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
+		tbl->hash_buckets = NULL;
+	}
+	if (tbl->phash_buckets) {
+		kfree(tbl->phash_buckets);
+		tbl->phash_buckets = NULL;
+	}
+	if (tbl->stats) {
+		free_percpu(tbl->stats);
+		tbl->stats = NULL;
+	}
+out:
+	put_beancounter(tbl->owner_ub);
+	put_ve(tbl->owner_env);
+	return -ENOMEM;
 }
 
 int neigh_table_clear(struct neigh_table *tbl)
@@ -1410,6 +1460,15 @@ int neigh_table_clear(struct neigh_table
 	neigh_ifdown(tbl, NULL);
 	if (atomic_read(&tbl->entries))
 		printk(KERN_CRIT "neighbour leakage\n");
+#ifdef CONFIG_PROC_FS
+	if (ve_is_super(get_exec_env())) {
+		char name[strlen(tbl->id) + sizeof("net/stat/")];
+		strcpy(name, "net/stat/");
+		strcat(name, tbl->id);
+		remove_proc_glob_entry(name, NULL);
+	}
+#endif
+
 	write_lock(&neigh_tbl_lock);
 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
 		if (*tp == tbl) {
@@ -1425,6 +1484,12 @@ int neigh_table_clear(struct neigh_table
 	kfree(tbl->phash_buckets);
 	tbl->phash_buckets = NULL;
 
+	free_percpu(tbl->stats);
+	tbl->stats = NULL;
+
+	put_beancounter(tbl->owner_ub);
+	put_ve(tbl->owner_env);
+
 	return 0;
 }
 
@@ -1446,6 +1511,8 @@ int neigh_delete(struct sk_buff *skb, st
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		read_unlock(&neigh_tbl_lock);
 
 		err = -EINVAL;
@@ -1499,6 +1566,8 @@ int neigh_add(struct sk_buff *skb, struc
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		read_unlock(&neigh_tbl_lock);
 
 		err = -EINVAL;
@@ -1642,6 +1711,8 @@ int neigh_dump_info(struct sk_buff *skb,
 	s_t = cb->args[0];
 
 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		if (t < s_t || (family && tbl->family != family))
 			continue;
 		if (t > s_t)
@@ -2217,11 +2288,12 @@ int neigh_sysctl_register(struct net_dev
 			  int p_id, int pdev_id, char *p_name, 
 			  proc_handler *handler)
 {
-	struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+	struct neigh_sysctl_table *t;
 	const char *dev_name_source = NULL;
 	char *dev_name = NULL;
 	int err = 0;
 
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (!t)
 		return -ENOBUFS;
 	memcpy(t, &neigh_sysctl_template, sizeof(*t));
diff -Nurap linux-2.6.9-100.orig/net/core/net-sysfs.c linux-2.6.9-ve023stab054/net/core/net-sysfs.c
--- linux-2.6.9-100.orig/net/core/net-sysfs.c	2004-10-19 01:55:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/net-sysfs.c	2011-06-15 19:26:19.000000000 +0400
@@ -370,18 +370,26 @@ static void netdev_release(struct class_
 	struct net_device *dev 
 		= container_of(cd, struct net_device, class_dev);
 
-	BUG_ON(dev->reg_state != NETREG_RELEASED);
+	BUG_ON(dev->reg_state != NETREG_RELEASED &&
+	       dev->reg_state != NETREG_REGISTERING);
 
 	kfree((char *)dev - dev->padded);
 }
 
-static struct class net_class = {
+struct class net_class = {
 	.name = "net",
 	.release = netdev_release,
 #ifdef CONFIG_HOTPLUG
 	.hotplug = netdev_hotplug,
 #endif
 };
+EXPORT_SYMBOL(net_class);
+
+#ifndef CONFIG_VE
+#define visible_net_class net_class
+#else
+#define visible_net_class (*get_exec_env()->net_class)
+#endif
 
 void netdev_unregister_sysfs(struct net_device * net)
 {
@@ -406,7 +414,7 @@ int netdev_register_sysfs(struct net_dev
 	struct class_device_attribute *attr;
 	int ret;
 
-	class_dev->class = &net_class;
+	class_dev->class = &visible_net_class;
 	class_dev->class_data = net;
 
 	strlcpy(class_dev->class_id, net->name, BUS_ID_SIZE);
@@ -439,12 +447,21 @@ out_cleanup:
 out_unreg:
 	printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n",
 	       net->name, ret);
-	class_device_unregister(class_dev);
+	/* put is called in free_netdev() */
+	class_device_del(class_dev);
 out:
 	return ret;
 }
 
+void prepare_sysfs_netdev(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->net_class = &net_class;
+#endif
+}
+
 int netdev_sysfs_init(void)
 {
+	prepare_sysfs_netdev();
 	return class_register(&net_class);
 }
diff -Nurap linux-2.6.9-100.orig/net/core/netfilter.c linux-2.6.9-ve023stab054/net/core/netfilter.c
--- linux-2.6.9-100.orig/net/core/netfilter.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/netfilter.c	2011-06-15 19:26:19.000000000 +0400
@@ -49,6 +49,13 @@ struct list_head nf_hooks[NPROTO][NF_MAX
 static LIST_HEAD(nf_sockopts);
 static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED;
 
+#ifdef CONFIG_VE_IPTABLES
+#define ve_nf_hooks \
+	((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
+#else
+#define ve_nf_hooks nf_hooks
+#endif
+
 /* 
  * A queue handler may be registered for each protocol.  Each is protected by
  * long term mutex.  The handler must provide an an outfn() to accept packets
@@ -71,7 +78,7 @@ int nf_register_hook(struct nf_hook_ops 
 	struct list_head *i;
 
 	spin_lock_bh(&nf_hook_lock);
-	list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
+	list_for_each(i, &ve_nf_hooks[reg->pf][reg->hooknum]) {
 		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
 			break;
 	}
@@ -82,6 +89,32 @@ int nf_register_hook(struct nf_hook_ops 
 	return 0;
 }
 
+int visible_nf_register_hook(struct nf_hook_ops *reg)
+{
+	int ret = 0;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct nf_hook_ops *tmp;
+		ret = -ENOMEM;
+		tmp = kmalloc(sizeof(struct nf_hook_ops), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, reg, sizeof(struct nf_hook_ops));
+		reg =  tmp;
+	}
+
+	ret = nf_register_hook(reg);
+	if (ret)
+		goto out;
+
+	return 0;
+out:
+	if (!ve_is_super(get_exec_env()))
+		kfree(reg);
+nomem:
+	return ret;
+}
+
 void nf_unregister_hook(struct nf_hook_ops *reg)
 {
 	spin_lock_bh(&nf_hook_lock);
@@ -91,6 +124,28 @@ void nf_unregister_hook(struct nf_hook_o
 	synchronize_net();
 }
 
+int visible_nf_unregister_hook(struct nf_hook_ops *reg)
+{
+	struct nf_hook_ops *i;
+
+	spin_lock_bh(&nf_hook_lock);
+	list_for_each_entry(i, &ve_nf_hooks[reg->pf][reg->hooknum], list) {
+		if (reg->hook == i->hook) {
+			reg = i;
+			break;
+		}
+	}
+	spin_unlock_bh(&nf_hook_lock);
+	if (reg != i)
+		return -ENOENT;
+
+	nf_unregister_hook(reg);
+
+	if (!ve_is_super(get_exec_env()))
+		kfree(reg);
+	return 0;	
+}
+
 /* Do exclusive ranges overlap? */
 static inline int overlap(int min1, int max1, int min2, int max2)
 {
@@ -314,6 +369,12 @@ static int nf_sockopt(struct sock *sk, i
 	struct nf_sockopt_ops *ops;
 	int ret;
 
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_nf_hooks || 
+	    !get_exec_env()->_ipt_standard_target)
+		return -ENOPROTOOPT;
+#endif
+
 	if (down_interruptible(&nf_sockopt_mutex) != 0)
 		return -EINTR;
 
@@ -542,9 +603,9 @@ int nf_hook_slow(int pf, unsigned int ho
 	skb->nf_debug |= (1 << hook);
 #endif
 
-	elem = &nf_hooks[pf][hook];
+	elem = &ve_nf_hooks[pf][hook];
  next_hook:
-	verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
+	verdict = nf_iterate(&ve_nf_hooks[pf][hook], &skb, hook, indev,
 			     outdev, &elem, okfn, hook_thresh);
 	if (verdict == NF_QUEUE) {
 		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
@@ -590,12 +651,12 @@ void nf_reinject(struct sk_buff *skb, st
 	/* Drop reference to owner of hook which queued us. */
 	module_put(info->elem->owner);
 
-	list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
+	list_for_each_rcu(i, &ve_nf_hooks[info->pf][info->hook]) {
 		if (i == elem) 
   			break;
   	}
   
-	if (elem == &nf_hooks[info->pf][info->hook]) {
+	if (elem == &ve_nf_hooks[info->pf][info->hook]) {
 		/* The module which sent it to userspace is gone. */
 		NFDEBUG("%s: module disappeared, dropping packet.\n",
 			__FUNCTION__);
@@ -610,7 +671,7 @@ void nf_reinject(struct sk_buff *skb, st
 
 	if (verdict == NF_ACCEPT) {
 	next_hook:
-		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
+		verdict = nf_iterate(&ve_nf_hooks[info->pf][info->hook],
 				     &skb, info->hook, 
 				     info->indev, info->outdev, &elem,
 				     info->okfn, INT_MIN);
@@ -835,27 +896,70 @@ EXPORT_SYMBOL(nf_log_packet);
    with it. */
 void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
 
-void __init netfilter_init(void)
+void init_nf_hooks(struct list_head (*nh)[NF_MAX_HOOKS])
 {
 	int i, h;
 
 	for (i = 0; i < NPROTO; i++) {
 		for (h = 0; h < NF_MAX_HOOKS; h++)
-			INIT_LIST_HEAD(&nf_hooks[i][h]);
+			INIT_LIST_HEAD(&nh[i][h]);
 	}
 }
 
+int init_netfilter(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+	struct ve_struct *envid;
+
+	envid = get_exec_env();
+	envid->_nf_hooks = kmalloc(sizeof(nf_hooks), GFP_KERNEL);
+	if (envid->_nf_hooks == NULL)
+		return -ENOMEM;
+
+	/* FIXME: charge ubc */
+
+	init_nf_hooks(envid->_nf_hooks);
+	return 0;
+#else
+	init_nf_hooks(nf_hooks);
+	return 0;
+#endif
+}
+
+#ifdef CONFIG_VE_IPTABLES
+void fini_netfilter(void)
+{
+	struct ve_struct *envid;
+
+	envid = get_exec_env();
+	if (envid->_nf_hooks != NULL)
+		kfree(envid->_nf_hooks);
+	envid->_nf_hooks = NULL;
+
+	/* FIXME: uncharge ubc */
+}
+#endif
+
+void __init netfilter_init(void)
+{
+	init_netfilter();
+}
+
 EXPORT_SYMBOL(ip_ct_attach);
 EXPORT_SYMBOL(ip_route_me_harder);
 EXPORT_SYMBOL(nf_getsockopt);
 EXPORT_SYMBOL(nf_hook_slow);
 EXPORT_SYMBOL(nf_hooks);
 EXPORT_SYMBOL(nf_register_hook);
+EXPORT_SYMBOL(visible_nf_register_hook);
 EXPORT_SYMBOL(nf_register_queue_handler);
 EXPORT_SYMBOL(nf_register_sockopt);
 EXPORT_SYMBOL(nf_register_sockopt_owner);
 EXPORT_SYMBOL(nf_reinject);
 EXPORT_SYMBOL(nf_setsockopt);
 EXPORT_SYMBOL(nf_unregister_hook);
+EXPORT_SYMBOL(visible_nf_unregister_hook);
 EXPORT_SYMBOL(nf_unregister_queue_handler);
 EXPORT_SYMBOL(nf_unregister_sockopt);
+EXPORT_SYMBOL(init_netfilter);
+EXPORT_SYMBOL(fini_netfilter);
diff -Nurap linux-2.6.9-100.orig/net/core/rtnetlink.c linux-2.6.9-ve023stab054/net/core/rtnetlink.c
--- linux-2.6.9-100.orig/net/core/rtnetlink.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/rtnetlink.c	2011-06-15 19:26:22.000000000 +0400
@@ -401,6 +401,8 @@ static int rtnetlink_dump_all(struct sk_
 		if (rtnetlink_links[idx] == NULL ||
 		    rtnetlink_links[idx][type].dumpit == NULL)
 			continue;
+		if (vz_security_proto_check(idx, 0, 0))
+			continue;
 		if (idx > s_idx)
 			memset(&cb->args[0], 0, sizeof(cb->args));
 		if (rtnetlink_links[idx][type].dumpit(skb, cb))
@@ -473,7 +475,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
 		return 0;
 
 	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
-	if (family >= NPROTO) {
+	if (family >= NPROTO || vz_security_proto_check(family, 0, 0)) {
 		*errp = -EAFNOSUPPORT;
 		return -1;
 	}
@@ -535,9 +537,12 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
 
 	if (link->doit == NULL)
 		link = &(rtnetlink_links[PF_UNSPEC][type]);
-	if (link->doit == NULL)
-		goto err_inval;
-	err = link->doit(skb, nlh, (void *)&rta_buf[0]);
+	/* SUSE 11 compat kludge here */
+	err = -EINVAL;
+	if (nlh->nlmsg_type == RTM_NEWLINK)
+		err = -EOPNOTSUPP;
+	if (link->doit != NULL)
+		err = link->doit(skb, nlh, (void *)&rta_buf[0]);
 
 	*errp = err;
 	return err;
@@ -599,7 +604,13 @@ static void rtnetlink_rcv(struct sock *s
 			return;
 
 		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
-			if (rtnetlink_rcv_skb(skb)) {
+			int ret;
+			struct ve_struct *old_env;
+
+			old_env = set_exec_env(VE_OWNER_SKB(skb));
+			ret = rtnetlink_rcv_skb(skb);
+			(void)set_exec_env(old_env);
+			if (ret) {
 				if (skb->len)
 					skb_queue_head(&sk->sk_receive_queue,
 						       skb);
diff -Nurap linux-2.6.9-100.orig/net/core/scm.c linux-2.6.9-ve023stab054/net/core/scm.c
--- linux-2.6.9-100.orig/net/core/scm.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/scm.c	2011-06-15 19:26:19.000000000 +0400
@@ -34,6 +34,7 @@
 #include <net/compat.h>
 #include <net/scm.h>
 
+#include <ub/ub_mem.h>
 
 /*
  *	Only allow a user to send credentials, that they could set with 
@@ -42,7 +43,9 @@
 
 static __inline__ int scm_check_creds(struct ucred *creds)
 {
-	if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
+	if ((creds->pid == virt_tgid(current) ||
+	     creds->pid == current->tgid ||
+	     capable(CAP_VE_SYS_ADMIN)) &&
 	    ((creds->uid == current->uid || creds->uid == current->euid ||
 	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
 	    ((creds->gid == current->gid || creds->gid == current->egid ||
@@ -69,7 +72,7 @@ static int scm_fp_copy(struct cmsghdr *c
 
 	if (!fpl)
 	{
-		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+		fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
 		if (!fpl)
 			return -ENOMEM;
 		*fplp = fpl;
@@ -292,7 +295,7 @@ struct scm_fp_list *scm_fp_dup(struct sc
 	if (!fpl)
 		return NULL;
 
-	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	new_fpl = ub_kmalloc(sizeof(*fpl), GFP_KERNEL);
 	if (new_fpl) {
 		INIT_LIST_HEAD(&new_fpl->list);
 		for (i=fpl->count-1; i>=0; i--)
diff -Nurap linux-2.6.9-100.orig/net/core/skbuff.c linux-2.6.9-ve023stab054/net/core/skbuff.c
--- linux-2.6.9-100.orig/net/core/skbuff.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/skbuff.c	2011-06-15 19:26:19.000000000 +0400
@@ -48,6 +48,7 @@
 #include <linux/in.h>
 #include <linux/inet.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/netdevice.h>
 #ifdef CONFIG_NET_CLS_ACT
 #include <net/pkt_sched.h>
@@ -68,6 +69,8 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
+#include <ub/ub_net.h>
+
 static kmem_cache_t *skbuff_head_cache;
 
 /*
@@ -137,6 +140,9 @@ struct sk_buff *alloc_skb(unsigned int s
 	if (!skb)
 		goto out;
 
+	if (ub_skb_alloc_bc(skb, gfp_mask))
+		goto nobc;
+
 	/* Get the DATA. Size must match skb_add_mtu(). */
 	size = SKB_DATA_ALIGN(size);
 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
@@ -150,6 +156,7 @@ struct sk_buff *alloc_skb(unsigned int s
 	skb->data = data;
 	skb->tail = data;
 	skb->end  = data + size;
+	SET_VE_OWNER_SKB(skb, get_exec_env());
 
 	atomic_set(&(skb_shinfo(skb)->dataref), 1);
 	skb_shinfo(skb)->nr_frags  = 0;
@@ -159,6 +166,8 @@ struct sk_buff *alloc_skb(unsigned int s
 out:
 	return skb;
 nodata:
+	ub_skb_free_bc(skb);
+nobc:
 	kmem_cache_free(skbuff_head_cache, skb);
 	skb = NULL;
 	goto out;
@@ -278,6 +287,7 @@ void skb_release_data(struct sk_buff *sk
 void kfree_skbmem(struct sk_buff *skb)
 {
 	skb_release_data(skb);
+	ub_skb_free_bc(skb);
 	kmem_cache_free(skbuff_head_cache, skb);
 }
 
@@ -302,6 +312,7 @@ void __kfree_skb(struct sk_buff *skb)
 #ifdef CONFIG_XFRM
 	secpath_put(skb->sp);
 #endif
+	ub_skb_uncharge(skb);
 	if(skb->destructor) {
 		if (in_irq())
 			printk(KERN_WARNING "Warning: kfree_skb on "
@@ -347,6 +358,11 @@ struct sk_buff *skb_clone(struct sk_buff
 	if (!n) 
 		return NULL;
 
+	if (ub_skb_alloc_bc(n, gfp_mask)) {
+		kmem_cache_free(skbuff_head_cache, n);
+		return NULL;
+	}
+
 #define C(x) n->x = skb->x
 
 	n->next = n->prev = NULL;
@@ -379,6 +395,7 @@ struct sk_buff *skb_clone(struct sk_buff
 	C(priority);
 	C(protocol);
 	C(security);
+	SET_VE_OWNER_SKB(n, VE_OWNER_SKB(skb));
 	n->destructor = NULL;
 #ifdef CONFIG_NETFILTER
 	C(nfmark);
@@ -447,6 +464,7 @@ static void copy_skb_header(struct sk_bu
 	new->stamp	= old->stamp;
 	new->destructor = NULL;
 	new->security	= old->security;
+	SET_VE_OWNER_SKB(new, VE_OWNER_SKB((struct sk_buff *)old));
 #ifdef CONFIG_NETFILTER
 	new->nfmark	= old->nfmark;
 	new->nfcache	= old->nfcache;
@@ -1573,6 +1591,7 @@ void __init skb_init(void)
 					      NULL, NULL);
 	if (!skbuff_head_cache)
 		panic("cannot create skbuff cache");
+	skbuff_head_cache->flags |= CFLGS_ENVIDS;
 }
 
 EXPORT_SYMBOL(___pskb_trim);
diff -Nurap linux-2.6.9-100.orig/net/core/sock.c linux-2.6.9-ve023stab054/net/core/sock.c
--- linux-2.6.9-100.orig/net/core/sock.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/sock.c	2011-06-15 19:26:19.000000000 +0400
@@ -106,6 +106,7 @@
 #include <linux/net.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/tcp.h>
@@ -121,6 +122,9 @@
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
 
+#include <ub/ub_net.h>
+#include <ub/beancounter.h>
+
 #include <linux/filter.h>
 
 #ifdef CONFIG_INET
@@ -169,7 +173,7 @@ static void sock_warn_obsolete_bsdism(co
 	static char warncomm[16];
 	if (strcmp(warncomm, current->comm) && warned < 5) { 
 		strcpy(warncomm,  current->comm); 
-		printk(KERN_WARNING "process `%s' is using obsolete "
+		ve_printk(VE_LOG, KERN_WARNING "process `%s' is using obsolete "
 		       "%s SO_BSDCOMPAT\n", warncomm, name);
 		warned++;
 	}
@@ -633,6 +637,7 @@ struct sock *sk_alloc(int family, int pr
 			       zero_it == 1 ? sizeof(struct sock) : zero_it);
 			sk->sk_family = family;
 			sock_lock_init(sk);
+			SET_VE_OWNER_SK(sk, get_exec_env());
 		}
 		sk->sk_slab = slab;
 		
@@ -665,6 +670,7 @@ void sk_free(struct sock *sk)
 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
 
 	security_sk_free(sk);
+	ub_sock_uncharge(sk);
 	kmem_cache_free(sk->sk_slab, sk);
 	module_put(owner);
 }
@@ -675,6 +681,7 @@ void __init sk_init(void)
 				      SLAB_HWCACHE_ALIGN, NULL, NULL);
 	if (!sk_cachep)
 		printk(KERN_CRIT "sk_init: Cannot create sock SLAB cache!");
+	sk_cachep->flags |= CFLGS_ENVIDS;
 
 	if (num_physpages <= 4096) {
 		sysctl_wmem_max = 32767;
@@ -831,6 +838,7 @@ static long sock_wait_for_wmem(struct so
 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
 				     unsigned long data_len, int noblock, int *errcode)
 {
+#if 0
 	struct sk_buff *skb;
 	unsigned int gfp_mask;
 	long timeo;
@@ -907,13 +915,87 @@ interrupted:
 	err = sock_intr_errno(timeo);
 failure:
 	*errcode = err;
+#endif
+	return NULL;
+}
+
+struct sk_buff *sock_alloc_send_skb2(struct sock *sk, unsigned long size,
+				     unsigned long size2, int noblock,
+				     int *errcode)
+{
+	struct sk_buff *skb;
+	unsigned int gfp_mask;
+	long timeo;
+	int err;
+
+	gfp_mask = sk->sk_allocation;
+	if (gfp_mask & __GFP_WAIT)
+		gfp_mask |= __GFP_REPEAT;
+
+	timeo = sock_sndtimeo(sk, noblock);
+	while (1) {
+		err = sock_error(sk);
+		if (err != 0)
+			goto failure;
+
+		err = -EPIPE;
+		if (sk->sk_shutdown & SEND_SHUTDOWN)
+			goto failure;
+
+		if (ub_sock_getwres_other(sk, skb_charge_size(size))) {
+			if (size2 < size) {
+				size = size2;
+				continue;
+			}
+			set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+			err = -EAGAIN;
+			if (!timeo)
+				goto failure;
+			if (signal_pending(current))
+				goto interrupted;
+			timeo = ub_sock_wait_for_space(sk, timeo,
+					skb_charge_size(size));
+			continue;
+		}
+
+		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+			skb = alloc_skb(size, sk->sk_allocation);
+			if (skb)
+				/* Full success... */
+				break;
+			ub_sock_retwres_other(sk, skb_charge_size(size),
+					SOCK_MIN_UBCSPACE_CH);
+			err = -ENOBUFS;
+			goto failure;
+		}
+		ub_sock_retwres_other(sk,
+				skb_charge_size(size),
+				SOCK_MIN_UBCSPACE_CH);
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		err = -EAGAIN;
+		if (!timeo)
+			goto failure;
+		if (signal_pending(current))
+			goto interrupted;
+		timeo = sock_wait_for_wmem(sk, timeo);
+	}
+
+	ub_skb_set_charge(skb, sk, skb_charge_size(size), UB_OTHERSOCKBUF);
+	skb_set_owner_w(skb, sk);
+	return skb;
+
+interrupted:
+	err = sock_intr_errno(timeo);
+failure:
+	*errcode = err;
 	return NULL;
 }
 
 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 
 				    int noblock, int *errcode)
 {
-	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
+	return sock_alloc_send_skb2(sk, size, size, noblock, errcode);
 }
 
 static void __lock_sock(struct sock *sk)
diff -Nurap linux-2.6.9-100.orig/net/core/stream.c linux-2.6.9-ve023stab054/net/core/stream.c
--- linux-2.6.9-100.orig/net/core/stream.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/stream.c	2011-06-15 19:26:22.000000000 +0400
@@ -109,8 +109,10 @@ EXPORT_SYMBOL(sk_stream_wait_close);
  * sk_stream_wait_memory - Wait for more memory for a socket
  * @sk - socket to wait for memory
  * @timeo_p - for how long
+ * @amount - amount of memory to wait for (in UB space!)
  */
-int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+int __sk_stream_wait_memory(struct sock *sk, long *timeo_p,
+		unsigned long amount)
 {
 	int err = 0;
 	long vm_wait = 0;
@@ -132,7 +134,10 @@ int sk_stream_wait_memory(struct sock *s
 		if (signal_pending(current))
 			goto do_interrupted;
 		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-		if (sk_stream_memory_free(sk) && !vm_wait)
+		if (amount == 0) {
+			if (sk_stream_memory_free(sk) && !vm_wait)
+				break;
+		} else if (!ub_sock_sndqueueadd_tcp(sk, amount))
 			break;
 
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
@@ -140,6 +145,8 @@ int sk_stream_wait_memory(struct sock *s
 		sk_wait_event(sk, &current_timeo, sk_stream_memory_free(sk) &&
 						  vm_wait);
 		sk->sk_write_pending--;
+		if (amount > 0)
+			ub_sock_sndqueuedel(sk);
 
 		if (vm_wait) {
 			vm_wait -= current_timeo;
@@ -166,6 +173,10 @@ do_interrupted:
 	goto out;
 }
 
+int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+{
+	return __sk_stream_wait_memory(sk, timeo_p, 0);
+}
 EXPORT_SYMBOL(sk_stream_wait_memory);
 
 void sk_stream_rfree(struct sk_buff *skb)
diff -Nurap linux-2.6.9-100.orig/net/core/utils.c linux-2.6.9-ve023stab054/net/core/utils.c
--- linux-2.6.9-100.orig/net/core/utils.c	2011-06-09 19:23:05.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/core/utils.c	2011-06-15 19:26:18.000000000 +0400
@@ -181,7 +181,7 @@ void * __init net_alloc_hash(const char 
 	/* allow the kernel cmdline to have a say */
 	if (!numentries) {
 		/* round applicable memory size up to nearest megabyte */
-		numentries = nr_all_pages;
+		numentries = nr_kernel_pages;
 		numentries += (1UL << (20 - PAGE_SHIFT)) - 1;
 		numentries >>= 20 - PAGE_SHIFT;
 		numentries <<= 20 - PAGE_SHIFT;
diff -Nurap linux-2.6.9-100.orig/net/ipv4/af_inet.c linux-2.6.9-ve023stab054/net/ipv4/af_inet.c
--- linux-2.6.9-100.orig/net/ipv4/af_inet.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/af_inet.c	2011-06-15 19:26:19.000000000 +0400
@@ -113,6 +113,8 @@
 #include <linux/mroute.h>
 #endif
 
+#include <ub/ub_net.h>
+
 DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
 
 #ifdef INET_REFCNT_DEBUG
@@ -287,6 +289,13 @@ static int inet_create(struct socket *so
 	if (sk == NULL)
 		goto out;
 
+	err = -ENOBUFS;
+	if (ub_sock_charge(sk, PF_INET, sock->type))
+		goto out_sk_free;
+	/* if charge was successful, sock_init_data() MUST be called to
+	 * set sk->sk_type. otherwise sk will be uncharged to wrong resource
+	 */
+
 	err = 0;
 	sk->sk_prot = answer_prot;
 	sk->sk_no_check = answer_no_check;
@@ -344,6 +353,8 @@ static int inet_create(struct socket *so
 	}
 out:
 	return err;
+out_sk_free:
+	sk_free(sk);
 out_rcu_unlock:
 	rcu_read_unlock();
 	goto out;
@@ -361,6 +372,9 @@ int inet_release(struct socket *sock)
 
 	if (sk) {
 		long timeout;
+		struct ve_struct *saved_env;
+
+		saved_env = set_exec_env(VE_OWNER_SK(sk));
 
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
@@ -378,6 +392,8 @@ int inet_release(struct socket *sock)
 			timeout = sk->sk_lingertime;
 		sock->sk = NULL;
 		sk->sk_prot->close(sk, timeout);
+
+		set_exec_env(saved_env);
 	}
 	return 0;
 }
@@ -990,20 +1006,20 @@ static struct net_protocol icmp_protocol
 
 static int __init init_ipv4_mibs(void)
 {
-	net_statistics[0] = alloc_percpu(struct linux_mib);
-	net_statistics[1] = alloc_percpu(struct linux_mib);
-	ip_statistics[0] = alloc_percpu(struct ipstats_mib);
-	ip_statistics[1] = alloc_percpu(struct ipstats_mib);
-	icmp_statistics[0] = alloc_percpu(struct icmp_mib);
-	icmp_statistics[1] = alloc_percpu(struct icmp_mib);
-	tcp_statistics[0] = alloc_percpu(struct tcp_mib);
-	tcp_statistics[1] = alloc_percpu(struct tcp_mib);
-	udp_statistics[0] = alloc_percpu(struct udp_mib);
-	udp_statistics[1] = alloc_percpu(struct udp_mib);
+	ve_net_statistics[0] = alloc_percpu(struct linux_mib);
+	ve_net_statistics[1] = alloc_percpu(struct linux_mib);
+	ve_ip_statistics[0] = alloc_percpu(struct ipstats_mib);
+	ve_ip_statistics[1] = alloc_percpu(struct ipstats_mib);
+	ve_icmp_statistics[0] = alloc_percpu(struct icmp_mib);
+	ve_icmp_statistics[1] = alloc_percpu(struct icmp_mib);
+	ve_tcp_statistics[0] = alloc_percpu(struct tcp_mib);
+	ve_tcp_statistics[1] = alloc_percpu(struct tcp_mib);
+	ve_udp_statistics[0] = alloc_percpu(struct udp_mib);
+	ve_udp_statistics[1] = alloc_percpu(struct udp_mib);
 	if (!
-	    (net_statistics[0] && net_statistics[1] && ip_statistics[0]
-	     && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
-	     && udp_statistics[0] && udp_statistics[1]))
+	    (ve_net_statistics[0] && ve_net_statistics[1] && ve_ip_statistics[0]
+	     && ve_ip_statistics[1] && ve_tcp_statistics[0] && ve_tcp_statistics[1]
+	     && ve_udp_statistics[0] && ve_udp_statistics[1]))
 		return -ENOMEM;
 
 	(void) tcp_mib_init();
diff -Nurap linux-2.6.9-100.orig/net/ipv4/arp.c linux-2.6.9-ve023stab054/net/ipv4/arp.c
--- linux-2.6.9-100.orig/net/ipv4/arp.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/arp.c	2011-06-15 19:26:20.000000000 +0400
@@ -173,7 +173,7 @@ struct neigh_ops arp_broken_ops = {
 	.queue_xmit =		dev_queue_xmit,
 };
 
-struct neigh_table arp_tbl = {
+struct neigh_table global_arp_tbl = {
 	.family =	AF_INET,
 	.entry_size =	sizeof(struct neighbour) + 4,
 	.key_len =	4,
@@ -182,7 +182,7 @@ struct neigh_table arp_tbl = {
 	.proxy_redo =	parp_redo,
 	.id =		"arp_cache",
 	.parms = {
-		.tbl =			&arp_tbl,
+		.tbl =			&global_arp_tbl,
 		.base_reachable_time =	30 * HZ,
 		.retrans_time =	1 * HZ,
 		.gc_staletime =	60 * HZ,
@@ -985,7 +985,7 @@ int arp_req_set(struct arpreq *r, struct
 			return 0;
 		}
 		if (dev == NULL) {
-			ipv4_devconf.proxy_arp = 1;
+			ve_ipv4_devconf.proxy_arp = 1;
 			return 0;
 		}
 		if (__in_dev_get(dev)) {
@@ -1091,7 +1091,7 @@ int arp_req_delete(struct arpreq *r, str
 			return pneigh_delete(&arp_tbl, &ip, dev);
 		if (mask == 0) {
 			if (dev == NULL) {
-				ipv4_devconf.proxy_arp = 0;
+				ve_ipv4_devconf.proxy_arp = 0;
 				return 0;
 			}
 			if (__in_dev_get(dev)) {
@@ -1237,7 +1237,9 @@ static int arp_proc_init(void);
 
 void __init arp_init(void)
 {
-	neigh_table_init(&arp_tbl);
+	get_ve0()->ve_arp_tbl = &global_arp_tbl;
+	if (neigh_table_init(&arp_tbl))
+		panic("cannot initialize ARP tables\n");
 
 	dev_add_pack(&arp_packet_type);
 	arp_proc_init();
@@ -1369,8 +1371,9 @@ static int arp_seq_open(struct inode *in
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
-       
+	struct neigh_seq_state *s;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
 		goto out;
 
@@ -1418,8 +1421,53 @@ EXPORT_SYMBOL(arp_rcv);
 EXPORT_SYMBOL(arp_create);
 EXPORT_SYMBOL(arp_xmit);
 EXPORT_SYMBOL(arp_send);
-EXPORT_SYMBOL(arp_tbl);
+EXPORT_SYMBOL(global_arp_tbl);
 
 #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
 EXPORT_SYMBOL(clip_tbl_hook);
 #endif
+
+int ve_arp_init(struct ve_struct *ve)
+{
+	struct ve_struct *old_env;
+	int err;
+
+	ve->ve_arp_tbl = kmalloc(sizeof(struct neigh_table), GFP_KERNEL);
+	if (ve->ve_arp_tbl == NULL)
+		return -ENOMEM;
+
+	*(ve->ve_arp_tbl) = global_arp_tbl;
+	ve->ve_arp_tbl->parms.tbl = ve->ve_arp_tbl;
+	old_env = set_exec_env(ve);
+	err = neigh_table_init(ve->ve_arp_tbl);
+	if (err)
+		goto out_free;
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
+			      NET_IPV4_NEIGH, "ipv4", NULL);
+#endif
+	err = 0;
+
+out:
+	set_exec_env(old_env);
+	return err;
+
+out_free:
+	kfree(ve->ve_arp_tbl);
+	ve->ve_arp_tbl = NULL;
+	goto out;
+}
+EXPORT_SYMBOL(ve_arp_init);
+
+void ve_arp_fini(struct ve_struct *ve)
+{
+	if (ve->ve_arp_tbl) {
+#ifdef CONFIG_SYSCTL
+		neigh_sysctl_unregister(&ve->ve_arp_tbl->parms);
+#endif
+		neigh_table_clear(ve->ve_arp_tbl);
+		kfree(ve->ve_arp_tbl);
+		ve->ve_arp_tbl = NULL;
+	}
+}
+EXPORT_SYMBOL(ve_arp_fini);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/devinet.c linux-2.6.9-ve023stab054/net/ipv4/devinet.c
--- linux-2.6.9-100.orig/net/ipv4/devinet.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/devinet.c	2011-06-15 19:26:21.000000000 +0400
@@ -69,7 +69,7 @@ struct ipv4_devconf ipv4_devconf = {
 	.shared_media =	  1,
 };
 
-static struct ipv4_devconf ipv4_devconf_dflt = {
+struct ipv4_devconf ipv4_devconf_dflt = {
 	.accept_redirects =  1,
 	.send_redirects =    1,
 	.secure_redirects =  1,
@@ -77,10 +77,16 @@ static struct ipv4_devconf ipv4_devconf_
 	.accept_source_route = 1,
 };
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ve_ipv4_devconf_dflt	(*(get_exec_env()->_ipv4_devconf_dflt))
+#else
+#define ve_ipv4_devconf_dflt	ipv4_devconf_dflt
+#endif
+
 static void rtmsg_ifa(int event, struct in_ifaddr *);
 
 static struct notifier_block *inetaddr_chain;
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy);
 #ifdef CONFIG_SYSCTL
 static void devinet_sysctl_register(struct in_device *in_dev,
@@ -90,9 +96,9 @@ static void devinet_sysctl_unregister(st
 
 /* Locks all the inet devices. */
 
-static struct in_ifaddr *inet_alloc_ifa(void)
+struct in_ifaddr *inet_alloc_ifa(void)
 {
-	struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
+	struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL_UBC);
 
 	if (ifa) {
 		memset(ifa, 0, sizeof(*ifa));
@@ -101,6 +107,7 @@ static struct in_ifaddr *inet_alloc_ifa(
 
 	return ifa;
 }
+EXPORT_SYMBOL(inet_alloc_ifa);
 
 static void inet_rcu_free_ifa(struct rcu_head *head)
 {
@@ -144,7 +151,7 @@ struct in_device *inetdev_init(struct ne
 		goto out;
 	memset(in_dev, 0, sizeof(*in_dev));
 	INIT_RCU_HEAD(&in_dev->rcu_head);
-	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
+	memcpy(&in_dev->cnf, &ve_ipv4_devconf_dflt, sizeof(in_dev->cnf));
 	in_dev->cnf.sysctl = NULL;
 	in_dev->dev = dev;
 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
@@ -175,6 +182,7 @@ out_kfree:
 	in_dev = NULL;
 	goto out;
 }
+EXPORT_SYMBOL(inetdev_init);
 
 static void in_dev_rcu_put(struct rcu_head *head)
 {
@@ -229,7 +237,7 @@ int inet_addr_onlink(struct in_device *i
 	return 0;
 }
 
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy)
 {
 	struct in_ifaddr *ifa1 = *ifap;
@@ -282,7 +290,7 @@ static void inet_del_ifa(struct in_devic
 	}
 }
 
-static int inet_insert_ifa(struct in_ifaddr *ifa)
+int inet_insert_ifa(struct in_ifaddr *ifa)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -332,6 +340,7 @@ static int inet_insert_ifa(struct in_ifa
 
 	return 0;
 }
+EXPORT_SYMBOL(inet_insert_ifa);
 
 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 {
@@ -540,7 +549,7 @@ int devinet_ioctl(unsigned int cmd, void
 
 	case SIOCSIFFLAGS:
 		ret = -EACCES;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_VE_NET_ADMIN))
 			goto out;
 		break;
 	case SIOCSIFADDR:	/* Set interface address (and family) */
@@ -548,7 +557,7 @@ int devinet_ioctl(unsigned int cmd, void
 	case SIOCSIFDSTADDR:	/* Set the destination address */
 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
 		ret = -EACCES;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_VE_NET_ADMIN))
 			goto out;
 		ret = -EINVAL;
 		if (sin->sin_family != AF_INET)
@@ -956,7 +965,7 @@ static int inetdev_event(struct notifier
 	case NETDEV_UP:
 		if (dev->mtu < 68)
 			break;
-		if (dev == &loopback_dev) {
+		if (dev == &visible_loopback_dev) {
 			struct in_ifaddr *ifa;
 			if ((ifa = inet_alloc_ifa()) != NULL) {
 				ifa->ifa_local =
@@ -1119,10 +1128,10 @@ static struct rtnetlink_link inet_rtnetl
 void inet_forward_change(void)
 {
 	struct net_device *dev;
-	int on = ipv4_devconf.forwarding;
+	int on = ve_ipv4_devconf.forwarding;
 
-	ipv4_devconf.accept_redirects = !on;
-	ipv4_devconf_dflt.forwarding = on;
+	ve_ipv4_devconf.accept_redirects = !on;
+	ve_ipv4_devconf_dflt.forwarding = on;
 
 	read_lock(&dev_base_lock);
 	for (dev = dev_base; dev; dev = dev->next) {
@@ -1147,9 +1156,9 @@ static int devinet_sysctl_forward(ctl_ta
 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
 	if (write && *valp != val) {
-		if (valp == &ipv4_devconf.forwarding)
+		if (valp == &ve_ipv4_devconf.forwarding)
 			inet_forward_change();
-		else if (valp != &ipv4_devconf_dflt.forwarding)
+		else if (valp != &ve_ipv4_devconf_dflt.forwarding)
 			rt_cache_flush(0);
 	}
 
@@ -1411,28 +1420,21 @@ static struct devinet_sysctl_table {
 	},
 };
 
-static void devinet_sysctl_register(struct in_device *in_dev,
-				    struct ipv4_devconf *p)
+static struct devinet_sysctl_table *__devinet_sysctl_register(char *dev_name,
+		int ifindex, struct ipv4_devconf *p)
 {
 	int i;
-	struct net_device *dev = in_dev ? in_dev->dev : NULL;
-	struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
-	char *dev_name = NULL;
+	struct devinet_sysctl_table *t;
 
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (!t)
-		return;
+		goto out;
+
 	memcpy(t, &devinet_sysctl, sizeof(*t));
 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
 		t->devinet_vars[i].de = NULL;
-	}
-
-	if (dev) {
-		dev_name = dev->name; 
-		t->devinet_dev[0].ctl_name = dev->ifindex;
-	} else {
-		dev_name = "default";
-		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+		t->devinet_vars[i].owner_env = get_exec_env();
 	}
 
 	/* 
@@ -1442,8 +1444,9 @@ static void devinet_sysctl_register(stru
 	 */	
 	dev_name = net_sysctl_strdup(dev_name);
 	if (!dev_name)
-	    goto free;
+	    goto out_free_table;
 
+	t->devinet_dev[0].ctl_name    = ifindex;
 	t->devinet_dev[0].procname    = dev_name;
 	t->devinet_dev[0].child	      = t->devinet_vars;
 	t->devinet_dev[0].de	      = NULL;
@@ -1456,17 +1459,38 @@ static void devinet_sysctl_register(stru
 
 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
 	if (!t->sysctl_header)
-	    goto free_procname;
+	    goto out_free_procname;
 
-	p->sysctl = t;
-	return;
+	return t;
 
 	/* error path */
- free_procname:
+out_free_procname:
 	kfree(dev_name);
- free:
+out_free_table:
 	kfree(t);
-	return;
+out:
+	printk(KERN_DEBUG "Can't register net/ipv4/conf sysctls.\n");
+	return NULL;
+}
+
+static void devinet_sysctl_register(struct in_device *in_dev,
+				    struct ipv4_devconf *p)
+{
+	struct net_device *dev;
+	char *dev_name;
+	int ifindex;
+
+	dev = in_dev ? in_dev->dev : NULL;
+
+	if (dev) {
+		dev_name = dev->name; 
+		ifindex = dev->ifindex;
+	} else {
+		dev_name = "default";
+		ifindex = NET_PROTO_CONF_DEFAULT;
+	}
+
+	p->sysctl = __devinet_sysctl_register(dev_name, ifindex, p);
 }
 
 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
@@ -1479,7 +1503,189 @@ static void devinet_sysctl_unregister(st
 		kfree(t);
 	}
 }
+
+extern int visible_ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+			void __user *buffer, size_t *lenp, loff_t *ppos);
+extern int visible_ipv4_sysctl_forward_strategy(ctl_table *table, int *name, int nlen,
+			 void *oldval, size_t *oldlenp,
+			 void *newval, size_t newlen, 
+			 void **context);
+
+extern void *get_flush_delay_addr(void);
+extern int visible_ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+			      void __user *buffer, size_t *lenp, loff_t *ppos);
+extern int visible_ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+						int __user *name,
+						int nlen,
+						void __user *oldval,
+						size_t __user *oldlenp,
+						void __user *newval,
+						size_t newlen,
+						void **context);
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+static ctl_table net_sysctl_tables[] = {
+	/* 0: net */
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[2],
+	},
+	{ .ctl_name = 0, },
+	/* 2: net/ipv4 */
+	{
+		.ctl_name	= NET_IPV4,
+		.procname	= "ipv4",
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[4],
+	},
+	{ .ctl_name = 0, },
+	/* 4, 5: net/ipv4/[vars] */
+	{
+		.ctl_name	= NET_IPV4_FORWARD,
+		.procname	= "ip_forward",
+		.data		= &ipv4_devconf.forwarding,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &visible_ipv4_sysctl_forward,
+		.strategy	= &visible_ipv4_sysctl_forward_strategy,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE,
+		.procname	= "route",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[7],
+	},
+	{ .ctl_name = 0 },
+	/* 7: net/ipv4/route/flush */
+	{
+		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
+		.procname	= "flush",
+		.data		= NULL, /* set up below */
+		.maxlen		= sizeof(int),
+		.mode		= 0200,
+		.proc_handler	= &visible_ipv4_sysctl_rtcache_flush,
+		.strategy	= &visible_ipv4_sysctl_rtcache_flush_strategy,
+	},
+	{ .ctl_name = 0 },
+};
+
+static int ip_forward_sysctl_register(struct ve_struct *ve,
+		struct ipv4_devconf *p)
+{
+	struct ctl_table_header *hdr;
+	ctl_table *root;
+
+	root = clone_sysctl_template(net_sysctl_tables,
+			sizeof(net_sysctl_tables) / sizeof(ctl_table));
+	if (root == NULL)
+		goto out;
+
+	root[4].data = &p->forwarding;
+	root[7].data = get_flush_delay_addr();
+
+	hdr = register_sysctl_table(root, 1);
+	if (hdr == NULL)
+		goto out_free;
+
+	ve->forward_header = hdr;
+	ve->forward_table = root;
+	return 0;
+
+out_free:
+	free_sysctl_clone(root);
+out:
+	return -ENOMEM;
+}
+
+static inline void ip_forward_sysctl_unregister(struct ve_struct *ve)
+{
+	unregister_sysctl_table(ve->forward_header);
+	ve->forward_header = NULL;
+}
+
+static inline void ip_forward_sysctl_free(struct ve_struct *ve)
+{
+	free_sysctl_clone(ve->forward_table);
+	ve->forward_table = NULL;
+}
+#endif
+#endif
+
+int devinet_sysctl_init(struct ve_struct *ve)
+{
+	int err = 0;
+#ifdef CONFIG_SYSCTL
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	struct ipv4_devconf *conf, *conf_def;
+
+	err = -ENOMEM;
+
+	conf = kmalloc(sizeof(*conf), GFP_KERNEL);
+	if (!conf)
+		goto err1;
+
+	memcpy(conf, &ipv4_devconf, sizeof(*conf));
+	conf->sysctl = __devinet_sysctl_register("all",
+			NET_PROTO_CONF_ALL, conf);
+	if (!conf->sysctl)
+		goto err2;
+
+	conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
+	if (!conf_def)
+		goto err3;
+
+	memcpy(conf_def, &ipv4_devconf_dflt, sizeof(*conf_def));
+	conf_def->sysctl = __devinet_sysctl_register("default",
+			NET_PROTO_CONF_DEFAULT, conf_def);
+	if (!conf_def->sysctl)
+		goto err4;
+
+	err = ip_forward_sysctl_register(ve, conf);
+	if (err)
+		goto err5;
+
+	ve->_ipv4_devconf = conf;
+	ve->_ipv4_devconf_dflt = conf_def;
+	return 0;
+
+err5:
+	devinet_sysctl_unregister(conf_def);
+err4:
+	kfree(conf_def);
+err3:
+	devinet_sysctl_unregister(conf);
+err2:
+	kfree(conf);
+err1:
+#endif
+#endif
+	return err;
+}
+
+void devinet_sysctl_fini(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSCTL
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	ip_forward_sysctl_unregister(ve);
+	devinet_sysctl_unregister(ve->_ipv4_devconf);
+	devinet_sysctl_unregister(ve->_ipv4_devconf_dflt);
+#endif
 #endif
+}
+
+void devinet_sysctl_free(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSCTL
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	ip_forward_sysctl_free(ve);
+	kfree(ve->_ipv4_devconf);
+	kfree(ve->_ipv4_devconf_dflt);
+#endif
+#endif
+}
 
 void __init devinet_init(void)
 {
@@ -1489,13 +1695,18 @@ void __init devinet_init(void)
 #ifdef CONFIG_SYSCTL
 	devinet_sysctl.sysctl_header =
 		register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
-	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
+	__devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
+			&ipv4_devconf_dflt);
 #endif
 }
 
 EXPORT_SYMBOL(devinet_ioctl);
 EXPORT_SYMBOL(in_dev_finish_destroy);
 EXPORT_SYMBOL(inet_select_addr);
+EXPORT_SYMBOL(inet_del_ifa);
 EXPORT_SYMBOL(inetdev_by_index);
+EXPORT_SYMBOL(devinet_sysctl_init);
+EXPORT_SYMBOL(devinet_sysctl_fini);
+EXPORT_SYMBOL(devinet_sysctl_free);
 EXPORT_SYMBOL(register_inetaddr_notifier);
 EXPORT_SYMBOL(unregister_inetaddr_notifier);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/fib_frontend.c linux-2.6.9-ve023stab054/net/ipv4/fib_frontend.c
--- linux-2.6.9-100.orig/net/ipv4/fib_frontend.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/fib_frontend.c	2011-06-15 19:26:19.000000000 +0400
@@ -51,14 +51,46 @@
 
 #define RT_TABLE_MIN RT_TABLE_MAIN
 
+#undef ip_fib_local_table
+#undef ip_fib_main_table
 struct fib_table *ip_fib_local_table;
 struct fib_table *ip_fib_main_table;
+void prepare_fib_tables(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->_local_table = ip_fib_local_table;
+	ip_fib_local_table = (struct fib_table *)0x12345678;
+	get_ve0()->_main_table = ip_fib_main_table;
+	ip_fib_main_table = (struct fib_table *)0x12345678;
+#endif
+}
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ip_fib_local_table 	get_exec_env()->_local_table
+#define ip_fib_main_table 	get_exec_env()->_main_table
+#endif
 
 #else
 
 #define RT_TABLE_MIN 1
 
+#undef fib_tables
 struct fib_table *fib_tables[RT_TABLE_MAX+1];
+void prepare_fib_tables(void)
+{
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	int i;
+
+	BUG_ON(sizeof(fib_tables) !=
+		sizeof(((struct ve_struct *)0)->_fib_tables));
+	memcpy(get_ve0()->_fib_tables, fib_tables, sizeof(fib_tables));
+	for (i = 0; i <= RT_TABLE_MAX; i++)
+		fib_tables[i] = (void *)0x12366678;
+#endif
+}
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define fib_tables get_exec_env()->_fib_tables
+#endif
 
 struct fib_table *__fib_new_table(int id)
 {
@@ -248,7 +280,7 @@ int ip_rt_ioctl(unsigned int cmd, void _
 	switch (cmd) {
 	case SIOCADDRT:		/* Add a route */
 	case SIOCDELRT:		/* Delete a route */
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_VE_NET_ADMIN))
 			return -EPERM;
 		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
 			return -EFAULT;
@@ -598,6 +630,7 @@ struct notifier_block fib_netdev_notifie
 
 void __init ip_fib_init(void)
 {
+	prepare_fib_tables();
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/fib_hash.c linux-2.6.9-ve023stab054/net/ipv4/fib_hash.c
--- linux-2.6.9-100.orig/net/ipv4/fib_hash.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/fib_hash.c	2011-06-15 19:26:20.000000000 +0400
@@ -35,6 +35,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
+#include <linux/ve.h>
 
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -72,11 +73,6 @@ struct fn_zone {
  * can be cheaper than memory lookup, so that FZ_* macros are used.
  */
 
-struct fn_hash {
-	struct fn_zone	*fn_zones[33];
-	struct fn_zone	*fn_zone_list;
-};
-
 static inline u32 fn_hash(u32 key, struct fn_zone *fz)
 {
 	u32 h = ntohl(key)>>(32 - fz->fz_order);
@@ -655,7 +651,7 @@ fn_hash_delete(struct fib_table *tb, str
 	return -ESRCH;
 }
 
-static int fn_flush_list(struct fn_zone *fz, int idx)
+static int fn_flush_list(struct fn_zone *fz, int idx, int destroy)
 {
 	struct hlist_head *head = &fz->fz_hash[idx];
 	struct hlist_node *node, *n;
@@ -670,7 +666,9 @@ static int fn_flush_list(struct fn_zone 
 		list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
 			struct fib_info *fi = fa->fa_info;
 
-			if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
+			if (fi == NULL)
+				continue;
+			if (destroy || (fi->fib_flags&RTNH_F_DEAD)) {
 				write_lock_bh(&fib_hash_lock);
 				list_del(&fa->fa_list);
 				if (list_empty(&f->fn_alias)) {
@@ -691,7 +689,7 @@ static int fn_flush_list(struct fn_zone 
 	return found;
 }
 
-static int fn_hash_flush(struct fib_table *tb)
+static int __fn_hash_flush(struct fib_table *tb, int destroy)
 {
 	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
 	struct fn_zone *fz;
@@ -701,11 +699,99 @@ static int fn_hash_flush(struct fib_tabl
 		int i;
 
 		for (i = fz->fz_divisor - 1; i >= 0; i--)
-			found += fn_flush_list(fz, i);
+			found += fn_flush_list(fz, i, destroy);
 	}
 	return found;
 }
 
+static int fn_hash_flush(struct fib_table *tb)
+{
+	return __fn_hash_flush(tb, 0);
+}
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+static void fn_free_zones(struct fib_table *tb)
+{
+	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
+	struct fn_zone *fz;
+
+	while ((fz = table->fn_zone_list) != NULL) {
+		table->fn_zone_list = fz->fz_next;
+		fz_hash_free(fz->fz_hash, fz->fz_divisor);
+		kfree(fz);
+	}
+}
+
+void fib_hash_destroy(struct fib_table *tb)
+{
+	__fn_hash_flush(tb, 1);
+	fn_free_zones(tb);
+	kfree(tb);
+}
+
+/*
+ * Initialization of virtualized networking subsystem.
+ */
+int init_ve_route(struct ve_struct *ve)
+{
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	if (fib_rules_create())
+		return -ENOMEM;
+	ve->_fib_tables[RT_TABLE_LOCAL] = fib_hash_init(RT_TABLE_LOCAL);
+	if (!ve->_fib_tables[RT_TABLE_LOCAL])
+		goto out_destroy;
+	ve->_fib_tables[RT_TABLE_MAIN] = fib_hash_init(RT_TABLE_MAIN);
+	if (!ve->_fib_tables[RT_TABLE_MAIN])
+		goto out_destroy_local;
+
+	return 0;
+
+out_destroy_local:
+	fib_hash_destroy(ve->_fib_tables[RT_TABLE_LOCAL]);
+out_destroy:
+	fib_rules_destroy();
+	ve->_local_rule = NULL;
+	return -ENOMEM;
+#else
+	ve->_local_table = fib_hash_init(RT_TABLE_LOCAL);
+	if (!ve->_local_table)
+		return -ENOMEM;
+	ve->_main_table = fib_hash_init(RT_TABLE_MAIN);
+	if (!ve->_main_table) {
+		fib_hash_destroy(ve->_local_table);
+		return -ENOMEM;
+	}
+	return 0;
+#endif
+}
+
+void fini_ve_route(struct ve_struct *ve)
+{
+	unsigned int bytes;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	int i;
+	for (i=0; i<RT_TABLE_MAX+1; i++)
+	{
+		if (!ve->_fib_tables[i])
+			continue;
+		fib_hash_destroy(ve->_fib_tables[i]);
+	}
+	fib_rules_destroy();
+	ve->_local_rule = NULL;
+#else
+	fib_hash_destroy(ve->_local_table);
+	fib_hash_destroy(ve->_main_table);
+#endif
+	bytes = ve->_fib_hash_size * sizeof(struct hlist_head *);
+	fib_hash_free(ve->_fib_info_hash, bytes);
+	fib_hash_free(ve->_fib_info_laddrhash, bytes);
+	ve->_fib_info_hash = ve->_fib_info_laddrhash = NULL;
+}
+
+EXPORT_SYMBOL(init_ve_route);
+EXPORT_SYMBOL(fini_ve_route);
+#endif
+
 
 static inline int
 fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
@@ -823,7 +909,7 @@ static void rtmsg_fib(int event, struct 
 		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
 }
 
-#ifdef CONFIG_IP_MULTIPLE_TABLES
+#if defined(CONFIG_IP_MULTIPLE_TABLES) || defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
 struct fib_table * fib_hash_init(int id)
 #else
 struct fib_table * __init fib_hash_init(int id)
@@ -833,14 +919,14 @@ struct fib_table * __init fib_hash_init(
 
 	if (fn_hash_kmem == NULL)
 		fn_hash_kmem = kmem_cache_create("ip_fib_hash",
-						 sizeof(struct fib_node),
-						 0, SLAB_HWCACHE_ALIGN,
+						 sizeof(struct fib_node), 0,
+						 SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						 NULL, NULL);
 
 	if (fn_alias_kmem == NULL)
 		fn_alias_kmem = kmem_cache_create("ip_fib_alias",
-						  sizeof(struct fib_alias),
-						  0, SLAB_HWCACHE_ALIGN,
+						  sizeof(struct fib_alias), 0,
+						  SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						  NULL, NULL);
 
 	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
@@ -1126,6 +1212,6 @@ int __init fib_proc_init(void)
 
 void __init fib_proc_exit(void)
 {
-	proc_net_remove("route");
+	remove_proc_glob_entry("net/route", NULL);
 }
 #endif /* CONFIG_PROC_FS */
diff -Nurap linux-2.6.9-100.orig/net/ipv4/fib_lookup.h linux-2.6.9-ve023stab054/net/ipv4/fib_lookup.h
--- linux-2.6.9-100.orig/net/ipv4/fib_lookup.h	2004-10-19 01:53:50.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/fib_lookup.h	2011-06-15 19:26:19.000000000 +0400
@@ -30,5 +30,6 @@ extern int fib_nh_match(struct rtmsg *r,
 extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 			 u8 tb_id, u8 type, u8 scope, void *dst,
 			 int dst_len, u8 tos, struct fib_info *fi);
+void fib_hash_free(struct hlist_head *hash, int bytes);
 
 #endif /* _FIB_LOOKUP_H */
diff -Nurap linux-2.6.9-100.orig/net/ipv4/fib_rules.c linux-2.6.9-ve023stab054/net/ipv4/fib_rules.c
--- linux-2.6.9-100.orig/net/ipv4/fib_rules.c	2004-10-19 01:54:32.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/fib_rules.c	2011-06-15 19:26:20.000000000 +0400
@@ -38,6 +38,7 @@
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/rtnetlink.h>
 #include <linux/init.h>
 
 #include <net/ip.h>
@@ -98,9 +99,93 @@ static struct fib_rule local_rule = {
 	.r_action =	RTN_UNICAST,
 };
 
-static struct fib_rule *fib_rules = &local_rule;
 static rwlock_t fib_rules_lock = RW_LOCK_UNLOCKED;
 
+void __init prepare_fib_rules(void)
+{
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	get_ve0()->_local_rule = &local_rule;
+	get_ve0()->_fib_rules = &local_rule;
+#endif
+}
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define local_rule (*(get_exec_env()->_local_rule))
+#define fib_rules (get_exec_env()->_fib_rules)
+#else
+static struct fib_rule *fib_rules = &local_rule;
+#endif
+
+#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
+int fib_rules_create()
+{
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	struct fib_rule *default_rule, *main_rule, *loc_rule;
+
+	default_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL_UBC);
+	if (default_rule == NULL)
+		goto out_def;
+	memset(default_rule, 0, sizeof(struct fib_rule));
+	atomic_set(&default_rule->r_clntref, 1);
+	default_rule->r_preference = 0x7FFF;
+	default_rule->r_table = RT_TABLE_DEFAULT;
+	default_rule->r_action = RTN_UNICAST;
+
+	main_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL_UBC);
+	if (main_rule == NULL)
+		goto out_main;
+	memset(main_rule, 0, sizeof(struct fib_rule));
+	atomic_set(&main_rule->r_clntref, 1);
+	main_rule->r_preference = 0x7FFE;
+	main_rule->r_table = RT_TABLE_MAIN;
+	main_rule->r_action = RTN_UNICAST;
+	main_rule->r_next = default_rule;
+
+	loc_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL_UBC);
+	if (loc_rule == NULL)
+		goto out_loc;
+	memset(loc_rule, 0, sizeof(struct fib_rule));
+	atomic_set(&loc_rule->r_clntref, 1);
+	loc_rule->r_preference = 0;
+	loc_rule->r_table = RT_TABLE_LOCAL;
+	loc_rule->r_action = RTN_UNICAST;
+	loc_rule->r_next = main_rule;
+
+	get_exec_env()->_local_rule = loc_rule;
+	get_exec_env()->_fib_rules = loc_rule;
+
+	return 0;
+
+out_loc:
+	kfree(main_rule);
+out_main:
+	kfree(default_rule);
+out_def:
+	return -1;
+#else
+	return 0;
+#endif
+}
+
+void fib_rules_destroy()
+{
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	struct fib_rule *r;
+
+	rtnl_lock();
+	write_lock_bh(&fib_rules_lock);
+	while(fib_rules != NULL) {
+		r = fib_rules;
+		fib_rules = fib_rules->r_next;
+		r->r_dead = 1;
+		fib_rule_put(r);
+	}
+	write_unlock_bh(&fib_rules_lock);
+	rtnl_unlock();
+#endif
+}
+#endif
+
 int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct rtattr **rta = arg;
@@ -183,7 +268,7 @@ int inet_rtm_newrule(struct sk_buff *skb
 		}
 	}
 
-	new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
+	new_r = kmalloc(sizeof(*new_r), GFP_KERNEL_UBC);
 	if (!new_r)
 		return -ENOMEM;
 	memset(new_r, 0, sizeof(*new_r));
@@ -440,5 +525,6 @@ int inet_dump_rules(struct sk_buff *skb,
 
 void __init fib_rules_init(void)
 {
+	prepare_fib_rules();
 	register_netdevice_notifier(&fib_rules_notifier);
 }
diff -Nurap linux-2.6.9-100.orig/net/ipv4/fib_semantics.c linux-2.6.9-ve023stab054/net/ipv4/fib_semantics.c
--- linux-2.6.9-100.orig/net/ipv4/fib_semantics.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/fib_semantics.c	2011-06-15 19:26:19.000000000 +0400
@@ -32,6 +32,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
+#include <linux/ve.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
@@ -48,10 +49,18 @@
 #define FSprintk(a...)
 
 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define fib_info_hash (get_exec_env()->_fib_info_hash)
+#define fib_info_laddrhash (get_exec_env()->_fib_info_laddrhash)
+#define fib_hash_size (get_exec_env()->_fib_hash_size)
+#define fib_info_cnt (get_exec_env()->_fib_info_cnt)
+#else
 static struct hlist_head *fib_info_hash;
 static struct hlist_head *fib_info_laddrhash;
 static unsigned int fib_hash_size;
 static unsigned int fib_info_cnt;
+#endif
+
 
 #define DEVINDEX_HASHBITS 8
 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
@@ -232,13 +241,15 @@ static struct fib_info *fib_find_info(co
 	return NULL;
 }
 
-static inline unsigned int fib_devindex_hashfn(unsigned int val)
+static inline unsigned int fib_devindex_hashfn(unsigned int val,
+		envid_t veid)
 {
 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
 
 	return (val ^
 		(val >> DEVINDEX_HASHBITS) ^
-		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
+		(val >> (DEVINDEX_HASHBITS * 2)) ^
+		(veid ^ (veid >> 16))) & mask;
 }
 
 /* Check, that the gateway is already configured.
@@ -254,7 +265,7 @@ int ip_fib_check_default(u32 gw, struct 
 
 	read_lock(&fib_info_lock);
 
-	hash = fib_devindex_hashfn(dev->ifindex);
+	hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
 	head = &fib_info_devhash[hash];
 	hlist_for_each_entry(nh, node, head, nh_hash) {
 		if (nh->nh_dev == dev &&
@@ -509,7 +520,7 @@ static struct hlist_head *fib_hash_alloc
 			__get_free_pages(GFP_KERNEL, get_order(bytes));
 }
 
-static void fib_hash_free(struct hlist_head *hash, int bytes)
+void fib_hash_free(struct hlist_head *hash, int bytes)
 {
 	if (!hash)
 		return;
@@ -524,10 +535,13 @@ static void fib_hash_move(struct hlist_h
 			  struct hlist_head *new_laddrhash,
 			  unsigned int new_size)
 {
+	struct hlist_head *old_info_hash, *old_laddrhash;
 	unsigned int old_size = fib_hash_size;
-	unsigned int i;
+	unsigned int i, bytes;
 
 	write_lock(&fib_info_lock);
+	old_info_hash = fib_info_hash;
+	old_laddrhash = fib_info_laddrhash;
 	fib_hash_size = new_size;
 
 	for (i = 0; i < old_size; i++) {
@@ -567,6 +581,10 @@ static void fib_hash_move(struct hlist_h
 	fib_info_laddrhash = new_laddrhash;
 
 	write_unlock(&fib_info_lock);
+
+	bytes = old_size * sizeof(struct hlist_head *);
+	fib_hash_free(old_info_hash, bytes);
+	fib_hash_free(old_laddrhash, bytes);
 }
 
 struct fib_info *
@@ -743,7 +761,8 @@ link_it:
 
 		if (!nh->nh_dev)
 			continue;
-		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
+		hash = fib_devindex_hashfn(nh->nh_dev->ifindex,
+				VEID(nh->nh_dev->owner_env));
 		head = &fib_info_devhash[hash];
 		hlist_add_head(&nh->nh_hash, head);
 	} endfor_nexthops(fi)
@@ -1083,7 +1102,8 @@ int fib_sync_down(u32 local, struct net_
 
 	if (dev) {
 		struct fib_info *prev_fi = NULL;
-		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+		unsigned int hash = fib_devindex_hashfn(dev->ifindex,
+				VEID(dev->owner_env));
 		struct hlist_head *head = &fib_info_devhash[hash];
 		struct hlist_node *node;
 		struct fib_nh *nh;
@@ -1148,7 +1168,7 @@ int fib_sync_up(struct net_device *dev)
 		return 0;
 
 	prev_fi = NULL;
-	hash = fib_devindex_hashfn(dev->ifindex);
+	hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
 	head = &fib_info_devhash[hash];
 	ret = 0;
 
diff -Nurap linux-2.6.9-100.orig/net/ipv4/igmp.c linux-2.6.9-ve023stab054/net/ipv4/igmp.c
--- linux-2.6.9-100.orig/net/ipv4/igmp.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/igmp.c	2011-06-15 19:26:19.000000000 +0400
@@ -680,15 +680,18 @@ static int igmp_send_report(struct in_de
 static void igmp_gq_timer_expire(unsigned long data)
 {
 	struct in_device *in_dev = (struct in_device *)data;
+	struct ve_struct *old_env = set_exec_env(in_dev->dev->owner_env);
 
 	in_dev->mr_gq_running = 0;
 	igmpv3_send_report(in_dev, NULL);
 	__in_dev_put(in_dev);
+	set_exec_env(old_env);
 }
 
 static void igmp_ifc_timer_expire(unsigned long data)
 {
 	struct in_device *in_dev = (struct in_device *)data;
+	struct ve_struct *old_env = set_exec_env(in_dev->dev->owner_env);
 
 	igmpv3_send_cr(in_dev);
 	if (in_dev->mr_ifc_count) {
@@ -696,6 +699,7 @@ static void igmp_ifc_timer_expire(unsign
 		igmp_ifc_start_timer(in_dev, IGMP_Unsolicited_Report_Interval);
 	}
 	__in_dev_put(in_dev);
+	set_exec_env(old_env);
 }
 
 static void igmp_ifc_event(struct in_device *in_dev)
@@ -712,6 +716,7 @@ static void igmp_timer_expire(unsigned l
 {
 	struct ip_mc_list *im=(struct ip_mc_list *)data;
 	struct in_device *in_dev = im->interface;
+	struct ve_struct *old_env = set_exec_env(in_dev->dev->owner_env);
 
 	spin_lock(&im->lock);
 	im->tm_running=0;
@@ -731,6 +736,7 @@ static void igmp_timer_expire(unsigned l
 		igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
 
 	ip_ma_put(im);
+	set_exec_env(old_env);
 }
 
 static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
@@ -2205,6 +2211,8 @@ static inline struct ip_mc_list *igmp_mc
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct in_device *in_dev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		in_dev = in_dev_get(state->dev);
 		if (!in_dev)
 			continue;
@@ -2234,6 +2242,8 @@ static struct ip_mc_list *igmp_mc_get_ne
 			state->in_dev = NULL;
 			break;
 		}
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		state->in_dev = in_dev_get(state->dev);
 		if (!state->in_dev)
 			continue;
@@ -2364,10 +2374,13 @@ static inline struct ip_sf_list *igmp_mc
 	struct ip_mc_list *im = NULL;
 	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
 
-	for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
+	for (state->dev = dev_base,
+					state->idev = NULL, state->im = NULL;
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct in_device *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in_dev_get(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
@@ -2407,6 +2420,8 @@ static struct ip_sf_list *igmp_mcf_get_n
 				state->idev = NULL;
 				goto out;
 			}
+			if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+				continue;
 			state->idev = in_dev_get(state->dev);
 			if (!state->idev)
 				continue;
diff -Nurap linux-2.6.9-100.orig/net/ipv4/ip_forward.c linux-2.6.9-ve023stab054/net/ipv4/ip_forward.c
--- linux-2.6.9-100.orig/net/ipv4/ip_forward.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/ip_forward.c	2011-06-15 19:26:19.000000000 +0400
@@ -91,6 +91,23 @@ int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto sr_failed;
 
+	/*
+	 * We try to optimize forwarding of VE packets:
+	 * do not decrement TTL (and so save skb_cow)
+	 * during forwarding of outgoing pkts from VE.
+	 * For incoming pkts we still do ttl decr,
+	 * since such skb is not cloned and does not require
+	 * actual cow. So, there is at least one place
+	 * in pkts path with mandatory ttl decr, that is
+	 * sufficient to prevent routing loops.
+	 */
+	if (
+#ifdef CONFIG_IP_ROUTE_NAT			
+	    (rt->rt_flags & RTCF_NAT) == 0 &&	  /* no NAT mangling expected */
+#endif						  /* and */
+	    (skb->dev->features & NETIF_F_VENET)) /* src is VENET device */
+		goto no_ttl_decr;
+
 	/* We are about to mangle packet. Copy it! */
 	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
 		goto drop;
@@ -99,6 +116,8 @@ int ip_forward(struct sk_buff *skb)
 	/* Decrease ttl after skb cow done */
 	ip_decrease_ttl(iph);
 
+no_ttl_decr:
+
 	/*
 	 *	We now generate an ICMP HOST REDIRECT giving the route
 	 *	we calculated.
diff -Nurap linux-2.6.9-100.orig/net/ipv4/ip_fragment.c linux-2.6.9-ve023stab054/net/ipv4/ip_fragment.c
--- linux-2.6.9-100.orig/net/ipv4/ip_fragment.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/ip_fragment.c	2011-06-15 19:26:19.000000000 +0400
@@ -42,6 +42,7 @@
 #include <linux/udp.h>
 #include <linux/inet.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/ve_owner.h>
 
 /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
  * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -92,8 +93,12 @@ struct ipq {
 	struct ipq	**pprev;
 	int		iif;
 	struct timeval	stamp;
+	struct ve_struct *owner_env;
 };
 
+DCL_VE_OWNER_PROTO(IPQ, TAIL_SOFT, struct ipq, owner_env, inline, (always_inline))
+DCL_VE_OWNER(IPQ, TAIL_SOFT, struct ipq, owner_env, inline, (always_inline))
+
 /* Hash table. */
 
 #define IPQ_HASHSZ	64
@@ -105,6 +110,20 @@ static u32 ipfrag_hash_rnd;
 static LIST_HEAD(ipq_lru_list);
 int ip_frag_nqueues = 0;
 
+void prepare_ipq(void)
+{
+	struct ipq *qp;
+	unsigned int hash;
+
+	write_lock(&ipfrag_lock);
+	for (hash = 0; hash < IPQ_HASHSZ; hash++) {
+		for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+			SET_VE_OWNER_IPQ(qp, get_ve0());
+		}
+	}
+	write_unlock(&ipfrag_lock);
+}
+
 static __inline__ void __ipq_unlink(struct ipq *qp)
 {
 	if(qp->next)
@@ -188,7 +207,8 @@ static __inline__ void frag_free_queue(s
 
 static __inline__ struct ipq *frag_alloc_queue(void)
 {
-	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+	struct ipq *qp = kmalloc(sizeof(struct ipq) + sizeof(void *),
+				GFP_ATOMIC);
 
 	if(!qp)
 		return NULL;
@@ -281,6 +301,9 @@ static void ip_evictor(void)
 static void ip_expire(unsigned long arg)
 {
 	struct ipq *qp = (struct ipq *) arg;
+	struct ve_struct *envid;
+
+	envid = set_exec_env(VE_OWNER_IPQ(qp));
 
 	spin_lock(&qp->lock);
 
@@ -303,6 +326,8 @@ static void ip_expire(unsigned long arg)
 out:
 	spin_unlock(&qp->lock);
 	ipq_put(qp, NULL);
+
+	(void)set_exec_env(envid);
 }
 
 /* Creation primitives. */
@@ -322,7 +347,8 @@ static struct ipq *ip_frag_intern(unsign
 		   qp->saddr == qp_in->saddr	&&
 		   qp->daddr == qp_in->daddr	&&
 		   qp->protocol == qp_in->protocol &&
-		   qp->user == qp_in->user) {
+		   qp->user == qp_in->user	&&
+		   qp->owner_env == get_exec_env()) {
 			atomic_inc(&qp->refcnt);
 			write_unlock(&ipfrag_lock);
 			qp_in->last_in |= COMPLETE;
@@ -374,6 +400,8 @@ static struct ipq *ip_frag_create(unsign
 	qp->lock = SPIN_LOCK_UNLOCKED;
 	atomic_set(&qp->refcnt, 1);
 
+	SET_VE_OWNER_IPQ(qp, get_exec_env());
+
 	return ip_frag_intern(hash, qp);
 
 out_nomem:
@@ -399,7 +427,8 @@ static inline struct ipq *ip_find(struct
 		   qp->saddr == saddr	&&
 		   qp->daddr == daddr	&&
 		   qp->protocol == protocol &&
-		   qp->user == user) {
+		   qp->user == user	&&
+		   qp->owner_env == get_exec_env()) {
 			atomic_inc(&qp->refcnt);
 			read_unlock(&ipfrag_lock);
 			return qp;
@@ -667,6 +696,9 @@ struct sk_buff *ip_defrag(struct sk_buff
 		    qp->meat == qp->len)
 			ret = ip_frag_reasm(qp, dev);
 
+		if (ret)
+			SET_VE_OWNER_SKB(ret, VE_OWNER_SKB(skb));
+
 		spin_unlock(&qp->lock);
 		ipq_put(qp, NULL);
 		return ret;
@@ -700,6 +732,48 @@ void ip_defrag_user_id_free(int user)
 }
 EXPORT_SYMBOL(ip_defrag_user_id_free);
 
+#ifdef CONFIG_VE
+/* XXX */
+void ip_fragment_cleanup(struct ve_struct *envid)
+{
+	int i, progress;
+
+	/* All operations with fragment queues are performed from NET_RX/TX
+	 * soft interrupts or from timer context.  --Den */
+	local_bh_disable();
+	do {
+		progress = 0;
+		for (i = 0; i < IPQ_HASHSZ; i++) {
+			struct ipq *qp;
+			if (ipq_hash[i] == NULL)
+				continue;
+inner_restart:
+			read_lock(&ipfrag_lock);
+			for (qp = ipq_hash[i]; qp; qp = qp->next) {
+				if (!ve_accessible_strict(
+						VE_OWNER_IPQ(qp),
+						envid))
+					continue;
+				atomic_inc(&qp->refcnt);
+				read_unlock(&ipfrag_lock);
+
+				spin_lock(&qp->lock);
+				if (!(qp->last_in&COMPLETE))
+					ipq_kill(qp);
+				spin_unlock(&qp->lock);
+
+				ipq_put(qp, NULL);
+				progress = 1;
+				goto inner_restart;
+			}
+			read_unlock(&ipfrag_lock);
+		}
+	} while(progress);
+	local_bh_enable();
+}
+EXPORT_SYMBOL(ip_fragment_cleanup);
+#endif
+
 void ipfrag_init(void)
 {
 	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
diff -Nurap linux-2.6.9-100.orig/net/ipv4/ip_output.c linux-2.6.9-ve023stab054/net/ipv4/ip_output.c
--- linux-2.6.9-100.orig/net/ipv4/ip_output.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/ip_output.c	2011-06-15 19:26:20.000000000 +0400
@@ -1272,13 +1272,14 @@ void ip_send_reply(struct sock *sk, stru
 		char			data[40];
 	} replyopts;
 	struct ipcm_cookie ipc;
-	u32 daddr;
+	u32 saddr, daddr;
 	struct rtable *rt = (struct rtable*)skb->dst;
 
 	if (ip_options_echo(&replyopts.opt, skb))
 		return;
 
-	daddr = ipc.addr = rt->rt_src;
+	saddr = skb->nh.iph->daddr;
+	daddr = ipc.addr = skb->nh.iph->saddr;
 	ipc.opt = NULL;
 
 	if (replyopts.opt.optlen) {
@@ -1291,7 +1292,7 @@ void ip_send_reply(struct sock *sk, stru
 	{
 		struct flowi fl = { .nl_u = { .ip4_u =
 					      { .daddr = daddr,
-						.saddr = rt->rt_spec_dst,
+						.saddr = saddr,
 						.tos = RT_TOS(skb->nh.iph->tos) } },
 				    /* Not quite clean, but right. */
 				    .uli_u = { .ports =
diff -Nurap linux-2.6.9-100.orig/net/ipv4/ipmr.c linux-2.6.9-ve023stab054/net/ipv4/ipmr.c
--- linux-2.6.9-100.orig/net/ipv4/ipmr.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/ipmr.c	2011-06-15 19:26:19.000000000 +0400
@@ -834,7 +834,7 @@ static void mrtsock_destruct(struct sock
 {
 	rtnl_lock();
 	if (sk == mroute_socket) {
-		ipv4_devconf.mc_forwarding--;
+		ve_ipv4_devconf.mc_forwarding--;
 
 		write_lock_bh(&mrt_lock);
 		mroute_socket=NULL;
@@ -885,7 +885,7 @@ int ip_mroute_setsockopt(struct sock *sk
 				mroute_socket=sk;
 				write_unlock_bh(&mrt_lock);
 
-				ipv4_devconf.mc_forwarding++;
+				ve_ipv4_devconf.mc_forwarding++;
 			}
 			rtnl_unlock();
 			return ret;
diff -Nurap linux-2.6.9-100.orig/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.9-ve023stab054/net/ipv4/ipvs/ip_vs_conn.c
--- linux-2.6.9-100.orig/net/ipv4/ipvs/ip_vs_conn.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/ipvs/ip_vs_conn.c	2011-06-15 19:26:19.000000000 +0400
@@ -861,7 +861,8 @@ int ip_vs_conn_init(void)
 	/* Allocate ip_vs_conn slab cache */
 	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
 					      sizeof(struct ip_vs_conn), 0,
-					      SLAB_HWCACHE_ALIGN, NULL, NULL);
+					      SLAB_HWCACHE_ALIGN | SLAB_UBC,
+					      NULL, NULL);
 	if (!ip_vs_conn_cachep) {
 		vfree(ip_vs_conn_tab);
 		return -ENOMEM;
diff -Nurap linux-2.6.9-100.orig/net/ipv4/ipvs/ip_vs_core.c linux-2.6.9-ve023stab054/net/ipv4/ipvs/ip_vs_core.c
--- linux-2.6.9-100.orig/net/ipv4/ipvs/ip_vs_core.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/ipvs/ip_vs_core.c	2011-06-15 19:26:19.000000000 +0400
@@ -980,6 +980,10 @@ ip_vs_in(unsigned int hooknum, struct sk
 	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
 	 *	... don't know why 1st test DOES NOT include 2nd (?)
 	 */
+	/*
+	 * VZ: the question above is right.
+	 * The second test is superfluous.
+	 */
 	if (unlikely(skb->pkt_type != PACKET_HOST
 		     || skb->dev == &loopback_dev || skb->sk)) {
 		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_core.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_core.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_core.c	2011-06-15 19:26:21.000000000 +0400
@@ -48,6 +48,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/listhelp.h>
+#include <ub/ub_mem.h>
 
 #define IP_CONNTRACK_VERSION	"2.1"
 
@@ -64,10 +65,25 @@ DECLARE_RWLOCK(ip_conntrack_expect_tuple
 atomic_t ip_conntrack_count = ATOMIC_INIT(0);
 EXPORT_SYMBOL(ip_conntrack_count);
 
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_conntrack_helpers \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_helpers)
+#define ve_ip_conntrack_max \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_max)
+#define ve_ip_conntrack_count \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_count)
+#else
 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
 LIST_HEAD(ip_conntrack_expect_list);
 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 static LIST_HEAD(helpers);
+struct list_head *ip_conntrack_hash;
+#define ve_ip_conntrack_count 		ip_conntrack_count
+#define ve_ip_conntrack_helpers		helpers
+#define ve_ip_conntrack_max 		ip_conntrack_max
+#define ve_ip_conntrack_unconfirmed 	unconfirmed
+#endif
+
 unsigned int ip_conntrack_htable_size = 0;
 int ip_conntrack_max;
 struct list_head *ip_conntrack_hash;
@@ -143,6 +159,28 @@ ip_ct_get_tuplepr(const struct sk_buff *
 	return ip_ct_get_tuple(iph, skb, dataoff, tuple, proto);
 }
 
+#ifdef CONFIG_VE_IPTABLES
+/* this function gives us an ability to safely restore
+ * connection in case of failure */
+void ip_conntrack_hash_insert(struct ip_conntrack *ct)
+{
+	u_int32_t hash, repl_hash;
+
+	if (!ip_conntrack_hash_rnd_initted) {
+		get_random_bytes(&ip_conntrack_hash_rnd, 4);
+		ip_conntrack_hash_rnd_initted = 1;
+	}
+
+        hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+        repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+        list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
+                 &ve_ip_conntrack_hash[hash]);
+        list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
+                 &ve_ip_conntrack_hash[repl_hash]);
+}
+EXPORT_SYMBOL(ip_conntrack_hash_insert);
+#endif
+
 int
 ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
 		   const struct ip_conntrack_tuple *orig,
@@ -192,7 +230,7 @@ __ip_ct_expect_find(const struct ip_conn
 {
 	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
 	MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
-	return LIST_FIND(&ip_conntrack_expect_list, expect_cmp, 
+	return LIST_FIND(&ve_ip_conntrack_expect_list, expect_cmp, 
 			 struct ip_conntrack_expect *, tuple);
 }
 
@@ -275,7 +313,7 @@ static void remove_expectations(struct i
 			continue;
 		}
 
-		IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
+		IP_NF_ASSERT(list_inlist(&ve_ip_conntrack_expect_list, exp));
 		IP_NF_ASSERT(exp->expectant == ct);
 
 		/* delete expectation from global and private lists */
@@ -293,8 +331,10 @@ clean_from_lists(struct ip_conntrack *ct
 
 	ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 	hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-	LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-	LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+	LIST_DELETE(&VE_OWNER_CT(ct)->_ip_conntrack->_ip_conntrack_hash[ho],
+ 		    &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+	LIST_DELETE(&VE_OWNER_CT(ct)->_ip_conntrack->_ip_conntrack_hash[hr],
+ 		    &ct->tuplehash[IP_CT_DIR_REPLY]);
 
 	/* Destroy all un-established, pending expectations */
 	remove_expectations(ct, 1);
@@ -305,6 +345,11 @@ destroy_conntrack(struct nf_conntrack *n
 {
 	struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL;
 	struct ip_conntrack_protocol *proto;
+#ifdef CONFIG_VE_IPTABLES
+	struct ve_struct *old;
+
+	old = set_exec_env(VE_OWNER_CT(ct));
+#endif
 
 	DEBUGP("destroy_conntrack(%p)\n", ct);
 	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
@@ -317,8 +362,8 @@ destroy_conntrack(struct nf_conntrack *n
 	if (proto && proto->destroy)
 		proto->destroy(ct);
 
-	if (ip_conntrack_destroyed)
-		ip_conntrack_destroyed(ct);
+	if (ve_ip_conntrack_destroyed)
+		ve_ip_conntrack_destroyed(ct);
 
 	WRITE_LOCK(&ip_conntrack_lock);
 	/* Make sure don't leave any orphaned expectations lying around */
@@ -342,8 +387,11 @@ destroy_conntrack(struct nf_conntrack *n
 
 	DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
 	kmem_cache_free(ip_conntrack_cachep, ct);
-	atomic_dec(&ip_conntrack_count);
+	atomic_dec(&ve_ip_conntrack_count);
 	CONNTRACK_STAT_INC(delete);
+#ifdef CONFIG_VE_IPTABLES
+	(void)set_exec_env(old);
+#endif
 }
 
 static void death_by_timeout(unsigned long ul_conntrack)
@@ -378,7 +426,7 @@ __ip_conntrack_find(const struct ip_conn
 	unsigned int cpu = smp_processor_id();
 
 	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
-	list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
+	list_for_each_entry(h, &ve_ip_conntrack_hash[hash], list) {
 		if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
 			per_cpu(ip_conntrack_stat, cpu).found++;
 			return h;
@@ -439,17 +487,17 @@ __ip_conntrack_confirm(struct sk_buff *s
 	/* See if there's one in the list already, including reverse:
            NAT could have grabbed it without realizing, since we're
            not in the hash.  If there is, we lost race. */
-	if (!LIST_FIND(&ip_conntrack_hash[hash],
+	if (!LIST_FIND(&ve_ip_conntrack_hash[hash],
 		       conntrack_tuple_cmp,
 		       struct ip_conntrack_tuple_hash *,
 		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
-	    && !LIST_FIND(&ip_conntrack_hash[repl_hash],
+	    && !LIST_FIND(&ve_ip_conntrack_hash[repl_hash],
 			  conntrack_tuple_cmp,
 			  struct ip_conntrack_tuple_hash *,
 			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
-		list_prepend(&ip_conntrack_hash[hash],
+		list_prepend(&ve_ip_conntrack_hash[hash],
 			     &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-		list_prepend(&ip_conntrack_hash[repl_hash],
+		list_prepend(&ve_ip_conntrack_hash[repl_hash],
 			     &ct->tuplehash[IP_CT_DIR_REPLY]);
 		/* Timer relative to confirmation time, not original
 		   setting time, otherwise we'd get timer wrap in
@@ -522,7 +570,7 @@ static inline int helper_cmp(const struc
 
 struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
 {
-	return LIST_FIND(&helpers, helper_cmp,
+	return LIST_FIND(&ve_ip_conntrack_helpers, helper_cmp,
 			 struct ip_conntrack_helper *,
 			 tuple);
 }
@@ -544,6 +592,37 @@ struct ip_conntrack_protocol *ip_ct_find
 	return p;
 }
 
+struct ip_conntrack *
+ip_conntrack_alloc(struct user_beancounter *ub)
+{
+	struct ip_conntrack *conntrack;
+	struct user_beancounter *old_ub;
+
+	old_ub = set_exec_ub(ub);
+	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
+	(void)set_exec_ub(old_ub);
+	if (!conntrack) {
+		DEBUGP("Can't allocate conntrack.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(conntrack, 0, sizeof(*conntrack));
+	atomic_set(&conntrack->ct_general.use, 1);
+	conntrack->ct_general.destroy = destroy_conntrack;
+
+	/* Don't set timer yet: wait for confirmation */
+	init_timer(&conntrack->timeout);
+	conntrack->timeout.data = (unsigned long)conntrack;
+	conntrack->timeout.function = death_by_timeout;
+#ifdef CONFIG_VE_IPTABLES
+	SET_VE_OWNER_CT(conntrack, get_exec_env());
+#endif
+
+	INIT_LIST_HEAD(&conntrack->sibling_list);
+	return conntrack;
+}
+EXPORT_SYMBOL(ip_conntrack_alloc);
+
 /* Allocate a new conntrack: we return -ENOMEM if classification
    failed due to stress.  Otherwise it really is unclassifiable. */
 static struct ip_conntrack_tuple_hash *
@@ -555,6 +634,7 @@ init_conntrack(const struct ip_conntrack
 	struct ip_conntrack_tuple repl_tuple;
 	size_t hash;
 	struct ip_conntrack_expect *expected;
+	struct user_beancounter *ub;
 
 	if (!ip_conntrack_hash_rnd_initted) {
 		get_random_bytes(&ip_conntrack_hash_rnd, 4);
@@ -563,14 +643,14 @@ init_conntrack(const struct ip_conntrack
 
 	hash = hash_conntrack(tuple);
 
-	if (ip_conntrack_max
-	    && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+	if (ve_ip_conntrack_max
+	    && atomic_read(&ve_ip_conntrack_count) >= ve_ip_conntrack_max) {
 		/* Try dropping from this hash chain. */
-		if (!early_drop(&ip_conntrack_hash[hash])) {
+		if (!early_drop(&ve_ip_conntrack_hash[hash])) {
 			if (net_ratelimit())
-				printk(KERN_WARNING
-				       "ip_conntrack: table full, dropping"
-				       " packet.\n");
+				ve_printk(VE_LOG_BOTH, KERN_WARNING
+				       "ip_conntrack: VPS %d: table full, dropping"
+				       " packet.\n", VEID(get_exec_env()));
 			return ERR_PTR(-ENOMEM);
 		}
 	}
@@ -580,15 +660,19 @@ init_conntrack(const struct ip_conntrack
 		return NULL;
 	}
 
-	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
-	if (!conntrack) {
-		DEBUGP("Can't allocate conntrack.\n");
-		return ERR_PTR(-ENOMEM);
-	}
+#ifdef CONFIG_USER_RESOURCE
+	if (skb->dev != NULL)  /* received skb */
+		ub = netdev_bc(skb->dev)->exec_ub;
+	else if (skb->sk != NULL) /* sent skb */
+		ub = sock_bc(skb->sk)->ub;
+	else
+#endif
+		ub = NULL;
+
+	conntrack = ip_conntrack_alloc(ub);
+	if (conntrack == NULL || IS_ERR(conntrack))
+		return (struct ip_conntrack_tuple_hash *)conntrack;
 
-	memset(conntrack, 0, sizeof(*conntrack));
-	atomic_set(&conntrack->ct_general.use, 1);
-	conntrack->ct_general.destroy = destroy_conntrack;
 	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
 	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
 	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
@@ -597,17 +681,11 @@ init_conntrack(const struct ip_conntrack
 		kmem_cache_free(ip_conntrack_cachep, conntrack);
 		return NULL;
 	}
-	/* Don't set timer yet: wait for confirmation */
-	init_timer(&conntrack->timeout);
-	conntrack->timeout.data = (unsigned long)conntrack;
-	conntrack->timeout.function = death_by_timeout;
-
-	INIT_LIST_HEAD(&conntrack->sibling_list);
 
 	WRITE_LOCK(&ip_conntrack_lock);
 	/* Need finding and deleting of expected ONLY if we win race */
 	READ_LOCK(&ip_conntrack_expect_tuple_lock);
-	expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
+	expected = LIST_FIND(&ve_ip_conntrack_expect_list, expect_cmp,
 			     struct ip_conntrack_expect *, tuple);
 	READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
 
@@ -634,12 +712,12 @@ init_conntrack(const struct ip_conntrack
 		__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
 		conntrack->master = expected;
 		expected->sibling = conntrack;
-		LIST_DELETE(&ip_conntrack_expect_list, expected);
+		LIST_DELETE(&ve_ip_conntrack_expect_list, expected);
 		expected->expectant->expecting--;
 		nf_conntrack_get(&master_ct(conntrack)->ct_general);
 
 		/* this is a braindead... --pablo */
-		atomic_inc(&ip_conntrack_count);
+		atomic_inc(&ve_ip_conntrack_count);
 		WRITE_UNLOCK(&ip_conntrack_lock);
 
 		if (expected->expectfn)
@@ -654,7 +732,7 @@ init_conntrack(const struct ip_conntrack
 		CONNTRACK_STAT_INC(new);
 	}
 
-end:	atomic_inc(&ip_conntrack_count);
+end:	atomic_inc(&ve_ip_conntrack_count);
 	WRITE_UNLOCK(&ip_conntrack_lock);
 
 ret:	return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
@@ -884,7 +962,7 @@ ip_conntrack_expect_alloc(void)
 	return new;
 }
 
-static void
+void
 ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
 			   struct ip_conntrack *related_to)
 {
@@ -895,7 +973,7 @@ ip_conntrack_expect_insert(struct ip_con
 	/* add to expected list for this connection */
 	list_add_tail(&new->expected_list, &related_to->sibling_list);
 	/* add to global list of expectations */
-	list_prepend(&ip_conntrack_expect_list, &new->list);
+	list_prepend(&ve_ip_conntrack_expect_list, &new->list);
 	/* add and start timer if required */
 	if (related_to->helper->timeout) {
 		init_timer(&new->timeout);
@@ -907,6 +985,7 @@ ip_conntrack_expect_insert(struct ip_con
 	}
 	related_to->expecting++;
 }
+EXPORT_SYMBOL(ip_conntrack_expect_insert);
 
 /* Add a related connection. */
 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
@@ -923,7 +1002,7 @@ int ip_conntrack_expect_related(struct i
 	DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
 	DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
 
-	old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
+	old = LIST_FIND(&ve_ip_conntrack_expect_list, resent_expect,
 		        struct ip_conntrack_expect *, &expect->tuple, 
 			&expect->mask);
 	if (old) {
@@ -990,7 +1069,7 @@ int ip_conntrack_expect_related(struct i
 		 */
 		unexpect_related(old);
 		ret = -EPERM;
-	} else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
+	} else if (LIST_FIND(&ve_ip_conntrack_expect_list, expect_clash,
 			     struct ip_conntrack_expect *, &expect->tuple, 
 			     &expect->mask)) {
 		WRITE_UNLOCK(&ip_conntrack_lock);
@@ -1026,7 +1105,7 @@ int ip_conntrack_change_expect(struct ip
 		/* Never seen before */
 		DEBUGP("change expect: never seen before\n");
 		if (!ip_ct_tuple_equal(&expect->tuple, newtuple) 
-		    && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
+		    && LIST_FIND(&ve_ip_conntrack_expect_list, expect_clash,
 			         struct ip_conntrack_expect *, newtuple, &expect->mask)) {
 			/* Force NAT to find an unused tuple */
 			ret = -1;
@@ -1076,8 +1155,22 @@ int ip_conntrack_alter_reply(struct ip_c
 
 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
 {
+	if (!ve_is_super(get_exec_env())) {
+		struct ip_conntrack_helper *tmp;
+		struct module *mod = me->me;
+
+		__module_get(mod);
+		tmp = kmalloc(sizeof(struct ip_conntrack_helper), GFP_KERNEL);
+		if (!tmp) {
+			module_put(mod);
+			return -ENOMEM;
+		}
+		memcpy(tmp, me, sizeof(struct ip_conntrack_helper));
+		me = tmp;
+	}
+
 	WRITE_LOCK(&ip_conntrack_lock);
-	list_prepend(&helpers, me);
+	list_prepend(&ve_ip_conntrack_helpers, me);
 	WRITE_UNLOCK(&ip_conntrack_lock);
 
 	return 0;
@@ -1098,19 +1191,38 @@ static inline int unhelp(struct ip_connt
 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
 {
 	unsigned int i;
+	struct ip_conntrack_helper *h;
+
+	if (!ve_is_super(get_exec_env())) {
+		READ_LOCK(&ip_conntrack_lock);
+		list_for_each_entry(h, &ve_ip_conntrack_helpers, list) {
+			if (h->name == me->name) {
+				me = h;
+				break;
+			}
+		}
+		READ_UNLOCK(&ip_conntrack_lock);
+		if (me != h)
+			return;
+	}	
 
 	/* Need write lock here, to delete helper. */
 	WRITE_LOCK(&ip_conntrack_lock);
-	LIST_DELETE(&helpers, me);
+	LIST_DELETE(&ve_ip_conntrack_helpers, me);
 
 	/* Get rid of expecteds, set helpers to NULL. */
 	for (i = 0; i < ip_conntrack_htable_size; i++)
-		LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
+		LIST_FIND_W(&ve_ip_conntrack_hash[i], unhelp,
 			    struct ip_conntrack_tuple_hash *, me);
 	WRITE_UNLOCK(&ip_conntrack_lock);
 
 	/* Someone could be still looking at the helper in a bh. */
 	synchronize_net();
+
+	if (!ve_is_super(get_exec_env())) {
+		module_put(me->me);
+		kfree(me);
+	}
 }
 
 static inline void ct_add_counters(struct ip_conntrack *ct,
@@ -1225,7 +1337,7 @@ get_next_corpse(int (*kill)(const struct
 
 	READ_LOCK(&ip_conntrack_lock);
 	for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) {
-		h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill,
+		h = LIST_FIND(&ve_ip_conntrack_hash[*bucket], do_kill,
 			      struct ip_conntrack_tuple_hash *, kill, data);
 	}
 	if (h)
@@ -1263,6 +1375,11 @@ getorigdst(struct sock *sk, int optval, 
 	struct ip_conntrack_tuple_hash *h;
 	struct ip_conntrack_tuple tuple;
 	
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_ip_conntrack)
+		return -ENOPROTOOPT;
+#endif
+
 	IP_CT_TUPLE_U_BLANK(&tuple);
 	tuple.src.ip = inet->rcv_saddr;
 	tuple.src.u.tcp.port = inet->sport;
@@ -1320,11 +1437,17 @@ static int kill_all(const struct ip_conn
 	return 1;
 }
 
+static void ip_conntrack_cache_free(void)
+{
+	kmem_cache_destroy(ip_conntrack_expect_cachep);
+	kmem_cache_destroy(ip_conntrack_cachep);
+	nf_unregister_sockopt(&so_getorigdst);
+}
+
 /* Mishearing the voices in his head, our hero wonders how he's
    supposed to kill the mall. */
 void ip_conntrack_cleanup(void)
 {
-	ip_ct_attach = NULL;
 	/* This makes sure all current packets have passed through
            netfilter framework.  Roll on, two-stage module
            delete... */
@@ -1332,23 +1455,32 @@ void ip_conntrack_cleanup(void)
  
  i_see_dead_people:
 	ip_ct_selective_cleanup(kill_all, NULL);
-	if (atomic_read(&ip_conntrack_count) != 0) {
+	if (atomic_read(&ve_ip_conntrack_count) != 0) {
 		schedule();
 		goto i_see_dead_people;
 	}
 
-	kmem_cache_destroy(ip_conntrack_cachep);
-	kmem_cache_destroy(ip_conntrack_expect_cachep);
-	vfree(ip_conntrack_hash);
-	nf_unregister_sockopt(&so_getorigdst);
+	if (ve_is_super(get_exec_env())) {
+		ip_ct_attach = NULL;
+		ip_conntrack_cache_free();
+	}
+	vfree(ve_ip_conntrack_hash);
+	ve_ip_conntrack_hash = NULL;		    
+	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
+	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
+#ifdef CONFIG_VE_IPTABLES
+	kfree(ve_ip_ct_protos);
+	ve_ip_ct_protos = NULL;
+	kfree(get_exec_env()->_ip_conntrack);
+	get_exec_env()->_ip_conntrack = NULL;
+#endif
 }
 
 static int hashsize;
 module_param(hashsize, int, 0400);
 
-int __init ip_conntrack_init(void)
+static int ip_conntrack_cache_create(void)
 {
-	unsigned int i;
 	int ret;
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
@@ -1364,72 +1496,130 @@ int __init ip_conntrack_init(void)
 		if (ip_conntrack_htable_size < 16)
 			ip_conntrack_htable_size = 16;
 	}
-	ip_conntrack_max = 8 * ip_conntrack_htable_size;
+	ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
 
 	printk("ip_conntrack version %s (%u buckets, %d max)"
 	       " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
-	       ip_conntrack_htable_size, ip_conntrack_max,
+	       ip_conntrack_htable_size, ve_ip_conntrack_max,
 	       sizeof(struct ip_conntrack));
 
 	ret = nf_register_sockopt_owner(&so_getorigdst, THIS_MODULE);
 	if (ret != 0) {
 		printk(KERN_ERR "Unable to register netfilter socket option\n");
-		return ret;
-	}
-
-	ip_conntrack_hash = vmalloc(sizeof(struct list_head)
-				    * ip_conntrack_htable_size);
-	if (!ip_conntrack_hash) {
-		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
-		goto err_unreg_sockopt;
+		goto out_sockopt;
 	}
 
+	ret = -ENOMEM;
 	ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
 	                                        sizeof(struct ip_conntrack), 0,
-	                                        SLAB_HWCACHE_ALIGN, NULL, NULL);
+	                                        SLAB_HWCACHE_ALIGN | SLAB_UBC,
+						NULL, NULL);
 	if (!ip_conntrack_cachep) {
 		printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
-		goto err_free_hash;
+		goto err_unreg_sockopt;
 	}
 
 	ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
 					sizeof(struct ip_conntrack_expect),
-					0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+					0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
+					NULL, NULL);
 	if (!ip_conntrack_expect_cachep) {
 		printk(KERN_ERR "Unable to create ip_expect slab cache\n");
 		goto err_free_conntrack_slab;
 	}
 
+	return 0;
+
+err_free_conntrack_slab:
+	kmem_cache_destroy(ip_conntrack_cachep);
+err_unreg_sockopt:
+	nf_unregister_sockopt(&so_getorigdst);
+out_sockopt:
+	return ret;
+}
+
+int ip_conntrack_init(void)
+{
+	struct ve_struct *env;
+	unsigned int i;
+	int ret;
+
+	env = get_exec_env();
+#ifdef CONFIG_VE_IPTABLES
+	ret = -ENOMEM;
+	env->_ip_conntrack =
+		kmalloc(sizeof(struct ve_ip_conntrack), GFP_KERNEL);
+	if (!env->_ip_conntrack)
+		goto out;
+	memset(env->_ip_conntrack, 0, sizeof(struct ve_ip_conntrack));
+	if (ve_is_super(env)) {
+		ret = ip_conntrack_cache_create();
+		if (ret)
+			goto cache_fail;
+	} else
+		ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
+#else /* CONFIG_VE_IPTABLES */
+	ret = ip_conntrack_cache_create();
+	if (ret)
+		goto out;
+#endif
+
+	ret = -ENOMEM;
+	ve_ip_conntrack_hash = vmalloc(sizeof(struct list_head)
+				    * ip_conntrack_htable_size);
+	if (!ve_ip_conntrack_hash) {
+		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
+		goto err_free_cache;
+	}
+
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_ct_protos = (struct ip_conntrack_protocol **)
+		ub_kmalloc(sizeof(void *)*MAX_IP_CT_PROTO, GFP_KERNEL);
+	if (!ve_ip_ct_protos)
+		goto err_free_hash;
+#endif
 	/* Don't NEED lock here, but good form anyway. */
 	WRITE_LOCK(&ip_conntrack_lock);
 	for (i = 0; i < MAX_IP_CT_PROTO; i++)
-		ip_ct_protos[i] = &ip_conntrack_generic_protocol;
+		ve_ip_ct_protos[i] = &ip_conntrack_generic_protocol;
 	/* Sew in builtin protocols. */
-	ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
-	ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
-	ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
+	ve_ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
+	ve_ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
+	ve_ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
 	WRITE_UNLOCK(&ip_conntrack_lock);
 
 	for (i = 0; i < ip_conntrack_htable_size; i++)
-		INIT_LIST_HEAD(&ip_conntrack_hash[i]);
+		INIT_LIST_HEAD(&ve_ip_conntrack_hash[i]);
 
-	/* For use by ipt_REJECT */
-	ip_ct_attach = ip_conntrack_attach;
+	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
+	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
 
-	/* Set up fake conntrack:
-	    - to never be deleted, not in any hashes */
-	atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
-	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
+	if (ve_is_super(env)) {
+		/* For use by ipt_REJECT */
+		ip_ct_attach = ip_conntrack_attach;
+
+		/* Set up fake conntrack:
+		   - to never be deleted, not in any hashes */
+		atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
+		/*  - and look it like as a confirmed connection */
+		set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
+	}
 
-	return ret;
+	return 0;
 
-err_free_conntrack_slab:
-	kmem_cache_destroy(ip_conntrack_cachep);
+#ifdef CONFIG_VE_IPTABLES
 err_free_hash:
-	vfree(ip_conntrack_hash);
-err_unreg_sockopt:
-	nf_unregister_sockopt(&so_getorigdst);
-
-	return -ENOMEM;
+#endif
+	vfree(ve_ip_conntrack_hash);
+	ve_ip_conntrack_hash = NULL;
+err_free_cache:
+	if (ve_is_super(env))
+		ip_conntrack_cache_free();
+#ifdef CONFIG_VE_IPTABLES
+cache_fail:
+	kfree(env->_ip_conntrack);
+	env->_ip_conntrack = NULL;
+#endif
+out:
+	return ret;
 }
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_ftp.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_ftp.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_ftp.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_ftp.c	2011-06-15 19:26:19.000000000 +0400
@@ -15,6 +15,7 @@
 #include <linux/ctype.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/lockhelp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
@@ -411,10 +412,10 @@ static int help(struct sk_buff *skb,
 static struct ip_conntrack_helper ftp[MAX_PORTS];
 static char ftp_names[MAX_PORTS][10];
 
-/* Not __exit: called from init() */
-static void fini(void)
+void fini_iptable_ftp(void)
 {
 	int i;
+
 	for (i = 0; i < ports_c; i++) {
 		DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
 				ports[i]);
@@ -422,6 +423,31 @@ static void fini(void)
 	}
 }
 
+int init_iptable_ftp(void)
+{
+	int i, ret;
+
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("ip_ct_ftp: registering helper for port %d\n",
+				ports[i]);
+		ret = ip_conntrack_helper_register(&ftp[i]);
+		if (ret) {
+			fini_iptable_ftp();
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/* Not __exit: called from init() */
+static void fini(void)
+{
+	KSYMMODUNRESOLVE(ip_conntrack_ftp);
+	KSYMUNRESOLVE(init_iptable_ftp);
+	KSYMUNRESOLVE(fini_iptable_ftp);
+	fini_iptable_ftp();
+}
+
 static int __init init(void)
 {
 	int i, ret;
@@ -447,16 +473,15 @@ static int __init init(void)
 		else
 			sprintf(tmpname, "ftp-%d", ports[i]);
 		ftp[i].name = tmpname;
-
-		DEBUGP("ip_ct_ftp: registering helper for port %d\n", 
-				ports[i]);
-		ret = ip_conntrack_helper_register(&ftp[i]);
-
-		if (ret) {
-			fini();
-			return ret;
-		}
 	}
+
+	ret = init_iptable_ftp();
+	if (ret)
+		return ret;
+
+	KSYMRESOLVE(init_iptable_ftp);
+	KSYMRESOLVE(fini_iptable_ftp);
+	KSYMMODRESOLVE(ip_conntrack_ftp);
 	return 0;
 }
 
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_irc.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_irc.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_irc.c	2004-10-19 01:54:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_irc.c	2011-06-15 19:26:19.000000000 +0400
@@ -28,6 +28,7 @@
 #include <linux/ip.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/lockhelp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
@@ -239,7 +240,33 @@ static int help(struct sk_buff *skb,
 static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
 static char irc_names[MAX_PORTS][10];
 
-static void fini(void);
+void fini_iptable_irc(void)
+{
+	int i;
+
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("unregistering port %d\n",
+				ports[i]);
+		ip_conntrack_helper_unregister(&irc_helpers[i]);
+	}
+}
+
+int init_iptable_irc(void)
+{
+	int i, ret;
+
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("port #%d: %d\n", i, ports[i]);
+		ret = ip_conntrack_helper_register(&irc_helpers[i]);
+		if (ret) {
+			printk("ip_conntrack_irc: ERROR registering port %d\n",
+				ports[i]);
+			fini_iptable_irc();
+			return ret;
+		}
+	}
+	return 0;
+}
 
 static int __init init(void)
 {
@@ -278,18 +305,15 @@ static int __init init(void)
 		else
 			sprintf(tmpname, "irc-%d", i);
 		hlpr->name = tmpname;
-
-		DEBUGP("port #%d: %d\n", i, ports[i]);
-
-		ret = ip_conntrack_helper_register(hlpr);
-
-		if (ret) {
-			printk("ip_conntrack_irc: ERROR registering port %d\n",
-				ports[i]);
-			fini();
-			return -EBUSY;
-		}
 	}
+
+	ret = init_iptable_irc();
+	if (ret)
+		return ret;
+
+	KSYMRESOLVE(init_iptable_irc);
+	KSYMRESOLVE(fini_iptable_irc);
+	KSYMMODRESOLVE(ip_conntrack_irc);
 	return 0;
 }
 
@@ -297,12 +321,10 @@ static int __init init(void)
  * it is needed by the init function */
 static void fini(void)
 {
-	int i;
-	for (i = 0; i < ports_c; i++) {
-		DEBUGP("unregistering port %d\n",
-		       ports[i]);
-		ip_conntrack_helper_unregister(&irc_helpers[i]);
-	}
+	KSYMMODUNRESOLVE(ip_conntrack_irc);
+	KSYMUNRESOLVE(init_iptable_irc);
+	KSYMUNRESOLVE(fini_iptable_irc);
+	fini_iptable_irc();
 }
 
 PROVIDES_CONNTRACK(irc);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_proto_generic.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2004-10-19 01:53:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2011-06-15 19:26:19.000000000 +0400
@@ -52,7 +52,7 @@ static int packet(struct ip_conntrack *c
 		  const struct sk_buff *skb,
 		  enum ip_conntrack_info ctinfo)
 {
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
+	ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_generic_timeout);
 	return NF_ACCEPT;
 }
 
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2004-10-19 01:53:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2011-06-15 19:26:19.000000000 +0400
@@ -102,7 +102,7 @@ static int icmp_packet(struct ip_conntra
 			ct->timeout.function((unsigned long)ct);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
-		ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
+		ip_ct_refresh_acct(ct, ctinfo, skb, ve_ip_ct_icmp_timeout);
 	}
 
 	return NF_ACCEPT;
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2011-06-15 19:26:19.000000000 +0400
@@ -100,7 +100,7 @@ unsigned long ip_ct_tcp_timeout_close = 
    to ~13-30min depending on RTO. */
 unsigned long ip_ct_tcp_timeout_max_retrans =     5 MINS;
  
-static unsigned long * tcp_timeouts[]
+unsigned long * tcp_timeouts[]
 = { NULL,                              /*      TCP_CONNTRACK_NONE */
     &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
     &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
@@ -699,7 +699,7 @@ static int tcp_in_window(struct ip_ct_tc
 
 			: "SEQ is over the upper bound (over the window of the receiver)");
 
-		res = ip_ct_tcp_be_liberal && !tcph->rst;
+		res = ve_ip_ct_tcp_be_liberal && !tcph->rst;
   	}
   
 	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
@@ -947,9 +947,11 @@ in_window:
 		old_state, new_state);
 
 	conntrack->proto.tcp.state = new_state;
-	timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
-		  && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
-		  ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
+	timeout = conntrack->proto.tcp.retrans >= ve_ip_ct_tcp_max_retrans &&
+		ve_ip_ct_tcp_timeouts[new_state] >
+		  			ve_ip_ct_tcp_timeout_max_retrans
+		  ? ve_ip_ct_tcp_timeout_max_retrans :
+		  			ve_ip_ct_tcp_timeouts[new_state];
 	WRITE_UNLOCK(&tcp_lock);
 
 	if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
@@ -1019,7 +1021,7 @@ static int tcp_new(struct ip_conntrack *
 		conntrack->proto.tcp.seen[1].flags = 0;
 		conntrack->proto.tcp.seen[0].loose = 
 		conntrack->proto.tcp.seen[1].loose = 0;
-	} else if (ip_ct_tcp_loose == 0) {
+	} else if (ve_ip_ct_tcp_loose == 0) {
 		/* Don't try to pick up connections. */
 		return 0;
 	} else {
@@ -1043,7 +1045,7 @@ static int tcp_new(struct ip_conntrack *
 		conntrack->proto.tcp.seen[0].flags =
 		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
 		conntrack->proto.tcp.seen[0].loose = 
-		conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
+		conntrack->proto.tcp.seen[1].loose = ve_ip_ct_tcp_loose;
 	}
     
 	conntrack->proto.tcp.seen[1].td_end = 0;
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_proto_udp.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2004-10-19 01:53:05.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2011-06-15 19:26:19.000000000 +0400
@@ -71,11 +71,11 @@ static int udp_packet(struct ip_conntrac
 	   stream.  Extend timeout. */
 	if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
 		ip_ct_refresh_acct(conntrack, ctinfo, skb, 
-				   ip_ct_udp_timeout_stream);
+				   ve_ip_ct_udp_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		set_bit(IPS_ASSURED_BIT, &conntrack->status);
 	} else
-		ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
+		ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_udp_timeout);
 
 	return NF_ACCEPT;
 }
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_standalone.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_conntrack_standalone.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_conntrack_standalone.c	2011-06-15 19:26:20.000000000 +0400
@@ -27,6 +27,8 @@
 #endif
 #include <net/checksum.h>
 #include <net/ip.h>
+#include <linux/nfcalls.h>
+#include <linux/stringify.h>
 
 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
@@ -45,9 +47,31 @@
 
 MODULE_LICENSE("GPL");
 
+int ip_conntrack_disable_ve0 = 0;
+module_param(ip_conntrack_disable_ve0, int, 0440);
+
 extern atomic_t ip_conntrack_count;
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_conntrack_count \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_count)
+#else
+#define ve_ip_conntrack_count	ip_conntrack_count
+#endif
 DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
 
+/* Prior to 2.6.15, we had a ip_conntrack_enable_ve0 param. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+	printk(KERN_INFO __stringify(KBUILD_MODNAME)
+	       " : parameter ip_conntrack_enable_ve0 is obsoleted. In kernel"
+	       " >= 2.6.15 connection tracking on hardware node is enabled by "
+	       "default, use ip_conntrack_disable_ve0=1 parameter to "
+	       "disable.\n");
+	return 0;
+}
+module_param_call(ip_conntrack_enable_ve0, warn_set, NULL, NULL, 0);
+
 static int kill_proto(const struct ip_conntrack *i, void *data)
 {
 	return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == 
@@ -87,8 +111,8 @@ static struct list_head *ct_get_first(st
 	for (st->bucket = 0;
 	     st->bucket < ip_conntrack_htable_size;
 	     st->bucket++) {
-		if (!list_empty(&ip_conntrack_hash[st->bucket]))
-			return ip_conntrack_hash[st->bucket].next;
+		if (!list_empty(&ve_ip_conntrack_hash[st->bucket]))
+			return ve_ip_conntrack_hash[st->bucket].next;
 	}
 	return NULL;
 }
@@ -98,10 +122,10 @@ static struct list_head *ct_get_next(str
 	struct ct_iter_state *st = seq->private;
 
 	head = head->next;
-	while (head == &ip_conntrack_hash[st->bucket]) {
+	while (head == &ve_ip_conntrack_hash[st->bucket]) {
 		if (++st->bucket >= ip_conntrack_htable_size)
 			return NULL;
-		head = ip_conntrack_hash[st->bucket].next;
+		head = ve_ip_conntrack_hash[st->bucket].next;
 	}
 	return head;
 }
@@ -228,7 +252,7 @@ static struct file_operations ct_file_op
 /* expects */
 static void *exp_seq_start(struct seq_file *s, loff_t *pos)
 {
-	struct list_head *e = &ip_conntrack_expect_list;
+	struct list_head *e = &ve_ip_conntrack_expect_list;
 	loff_t i;
 
 	/* strange seq_file api calls stop even if we fail,
@@ -241,7 +265,7 @@ static void *exp_seq_start(struct seq_fi
 
 	for (i = 0; i <= *pos; i++) {
 		e = e->next;
-		if (e == &ip_conntrack_expect_list)
+		if (e == &ve_ip_conntrack_expect_list)
 			return NULL;
 	}
 	return e;
@@ -253,7 +277,7 @@ static void *exp_seq_next(struct seq_fil
 
 	e = e->next;
 
-	if (e == &ip_conntrack_expect_list)
+	if (e == &ve_ip_conntrack_expect_list)
 		return NULL;
 
 	return e;
@@ -340,7 +364,7 @@ static void ct_cpu_seq_stop(struct seq_f
 
 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 {
-	unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
+	unsigned int nr_conntracks = atomic_read(&ve_ip_conntrack_count);
 	struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
@@ -522,6 +546,28 @@ static struct nf_hook_ops ip_conntrack_l
 
 /* From ip_conntrack_core.c */
 extern int ip_conntrack_max;
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_conntrack_max \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_max)
+#define ve_ip_ct_sysctl_header \
+	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_header)
+#define ve_ip_ct_net_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_net_table)
+#define ve_ip_ct_ipv4_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_ipv4_table)
+#define ve_ip_ct_netfilter_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_netfilter_table)
+#define ve_ip_ct_sysctl_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_table)
+#else
+#define ve_ip_conntrack_max		ip_conntrack_max
+static struct ctl_table_header *ip_ct_sysctl_header;
+#define ve_ip_ct_sysctl_header		ip_ct_sysctl_header
+#define ve_ip_ct_net_table		ip_ct_net_table
+#define ve_ip_ct_ipv4_table		ip_ct_ipv4_table
+#define ve_ip_ct_netfilter_table	ip_ct_netfilter_table
+#define ve_ip_ct_sysctl_table		ip_ct_sysctl_table
+#endif
 extern unsigned int ip_conntrack_htable_size;
 
 /* From ip_conntrack_proto_tcp.c */
@@ -552,8 +598,6 @@ extern unsigned long ip_ct_generic_timeo
 static int log_invalid_proto_min = 0;
 static int log_invalid_proto_max = 255;
 
-static struct ctl_table_header *ip_ct_sysctl_header;
-
 static ctl_table ip_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_NF_CONNTRACK_MAX,
@@ -762,6 +806,112 @@ static ctl_table ip_ct_net_table[] = {
 };
 
 EXPORT_SYMBOL(ip_ct_log_invalid);
+
+#ifdef CONFIG_VE_IPTABLES
+static void ip_conntrack_sysctl_cleanup(void)
+{
+	if (!ve_is_super(get_exec_env())) {
+		kfree(ve_ip_ct_net_table);
+		kfree(ve_ip_ct_ipv4_table);
+		kfree(ve_ip_ct_netfilter_table);
+		kfree(ve_ip_ct_sysctl_table);
+	}
+	ve_ip_ct_net_table = NULL;
+	ve_ip_ct_ipv4_table = NULL;
+	ve_ip_ct_netfilter_table = NULL;
+	ve_ip_ct_sysctl_table = NULL;
+}
+
+#define ALLOC_ENVCTL(field,k,label) \
+		if ( !(field = kmalloc(k*sizeof(ctl_table), GFP_KERNEL)) ) \
+				goto label;
+static int ip_conntrack_sysctl_init(void)
+{
+	int i, ret = 0;
+
+	ret = -ENOMEM;
+	if (ve_is_super(get_exec_env())) {
+		ve_ip_ct_net_table = ip_ct_net_table;
+		ve_ip_ct_ipv4_table = ip_ct_ipv4_table;
+		ve_ip_ct_netfilter_table = ip_ct_netfilter_table;
+		ve_ip_ct_sysctl_table = ip_ct_sysctl_table;
+	} else {
+		/* allocate structures in ve_struct */
+		ALLOC_ENVCTL(ve_ip_ct_net_table, 2, out);
+		ALLOC_ENVCTL(ve_ip_ct_ipv4_table, 2, nomem_1);
+		ALLOC_ENVCTL(ve_ip_ct_netfilter_table, 3, nomem_2);
+		ALLOC_ENVCTL(ve_ip_ct_sysctl_table, 21, nomem_3);
+
+		memcpy(ve_ip_ct_net_table, ip_ct_net_table,
+				2*sizeof(ctl_table));
+		memcpy(ve_ip_ct_ipv4_table, ip_ct_ipv4_table,
+				2*sizeof(ctl_table));
+		memcpy(ve_ip_ct_netfilter_table, ip_ct_netfilter_table,
+				3*sizeof(ctl_table));
+		memcpy(ve_ip_ct_sysctl_table, ip_ct_sysctl_table,
+				21*sizeof(ctl_table));
+
+		ve_ip_ct_net_table[0].child = ve_ip_ct_ipv4_table;
+		ve_ip_ct_ipv4_table[0].child = ve_ip_ct_netfilter_table;
+		ve_ip_ct_netfilter_table[0].child = ve_ip_ct_sysctl_table;
+	}
+	ve_ip_ct_sysctl_table[0].data = &ve_ip_conntrack_max;
+	ve_ip_ct_netfilter_table[1].data = &ve_ip_conntrack_max;
+	ve_ip_ct_sysctl_table[1].data = &ve_ip_conntrack_count;
+	/* skip ve_ip_ct_sysctl_table[2].data as it is read-only and common
+	 * for all environments */
+	ve_ip_ct_tcp_timeouts[1] = ip_ct_tcp_timeout_syn_sent;
+	ve_ip_ct_sysctl_table[3].data = &ve_ip_ct_tcp_timeouts[1];
+	ve_ip_ct_tcp_timeouts[2] = ip_ct_tcp_timeout_syn_recv;
+	ve_ip_ct_sysctl_table[4].data = &ve_ip_ct_tcp_timeouts[2];
+	ve_ip_ct_tcp_timeouts[3] = ip_ct_tcp_timeout_established;
+	ve_ip_ct_sysctl_table[5].data = &ve_ip_ct_tcp_timeouts[3];
+	ve_ip_ct_tcp_timeouts[4] = ip_ct_tcp_timeout_fin_wait;
+	ve_ip_ct_sysctl_table[6].data = &ve_ip_ct_tcp_timeouts[4];
+	ve_ip_ct_tcp_timeouts[5] = ip_ct_tcp_timeout_close_wait;
+	ve_ip_ct_sysctl_table[7].data = &ve_ip_ct_tcp_timeouts[5];
+	ve_ip_ct_tcp_timeouts[6] = ip_ct_tcp_timeout_last_ack;
+	ve_ip_ct_sysctl_table[8].data = &ve_ip_ct_tcp_timeouts[6];
+	ve_ip_ct_tcp_timeouts[7] = ip_ct_tcp_timeout_time_wait;
+	ve_ip_ct_sysctl_table[9].data = &ve_ip_ct_tcp_timeouts[7];
+	ve_ip_ct_tcp_timeouts[8] = ip_ct_tcp_timeout_close;
+	ve_ip_ct_sysctl_table[10].data = &ve_ip_ct_tcp_timeouts[8];
+	ve_ip_ct_udp_timeout = ip_ct_udp_timeout;
+	ve_ip_ct_sysctl_table[11].data = &ve_ip_ct_udp_timeout;
+	ve_ip_ct_udp_timeout_stream = ip_ct_udp_timeout_stream;
+	ve_ip_ct_sysctl_table[12].data = &ve_ip_ct_udp_timeout_stream;
+	ve_ip_ct_icmp_timeout = ip_ct_icmp_timeout;
+	ve_ip_ct_sysctl_table[13].data = &ve_ip_ct_icmp_timeout;
+	ve_ip_ct_generic_timeout = ip_ct_generic_timeout;
+	ve_ip_ct_sysctl_table[14].data = &ve_ip_ct_generic_timeout;
+	ve_ip_ct_log_invalid = ip_ct_log_invalid;
+	ve_ip_ct_sysctl_table[15].data = &ve_ip_ct_log_invalid;
+	ve_ip_ct_tcp_timeout_max_retrans = ip_ct_tcp_timeout_max_retrans;
+	ve_ip_ct_sysctl_table[16].data = &ve_ip_ct_tcp_timeout_max_retrans;
+	ve_ip_ct_tcp_loose = ip_ct_tcp_loose;
+	ve_ip_ct_sysctl_table[17].data = &ve_ip_ct_tcp_loose;
+	ve_ip_ct_tcp_be_liberal = ip_ct_tcp_be_liberal;
+	ve_ip_ct_sysctl_table[18].data = &ve_ip_ct_tcp_be_liberal;
+	ve_ip_ct_tcp_max_retrans = ip_ct_tcp_max_retrans;
+	ve_ip_ct_sysctl_table[19].data = &ve_ip_ct_tcp_max_retrans;
+	for (i = 0; i < 20; i++)
+		ve_ip_ct_sysctl_table[i].owner_env = get_exec_env();
+	ve_ip_ct_netfilter_table[1].owner_env = get_exec_env();
+	return 0;
+
+nomem_3:
+	kfree(ve_ip_ct_netfilter_table);
+	ve_ip_ct_netfilter_table = NULL;
+nomem_2:
+	kfree(ve_ip_ct_ipv4_table);
+	ve_ip_ct_ipv4_table = NULL;
+nomem_1:
+	kfree(ve_ip_ct_net_table);
+	ve_ip_ct_net_table = NULL;
+out:
+	return ret;
+}
+#endif /*CONFIG_VE*/
 #endif /* CONFIG_SYSCTL */
 
 static int init_or_cleanup(int init)
@@ -773,92 +923,120 @@ static int init_or_cleanup(int init)
 
 	if (!init) goto cleanup;
 
+	ret = -ENOENT;
+	if (!ve_is_super(get_exec_env()))
+		__module_get(THIS_MODULE);
+
 	ret = ip_conntrack_init();
 	if (ret < 0)
-		goto cleanup_nothing;
+		goto cleanup_unget;
+
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		return 0;
 
 #ifdef CONFIG_PROC_FS
-	proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
+	ret = -ENOENT;
+	proc = __proc_net_fops_create("net/ip_conntrack", 0440, &ct_file_ops,
+								NULL);
 	if (!proc) goto cleanup_init;
 
-	proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
-					&exp_file_ops);
+	proc_exp = __proc_net_fops_create("net/ip_conntrack_expect", 0440,
+					&exp_file_ops, NULL);
 	if (!proc_exp) goto cleanup_proc;
 
-	proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
-	if (!proc_stat)
-		goto cleanup_proc_exp;
+	if (ve_is_super(get_exec_env())) {
+		proc_stat = create_proc_entry("net/stat/ip_conntrack", S_IRUGO,
+									NULL);
+		if (!proc_stat)
+			goto cleanup_proc_exp;
 
-	proc_stat->proc_fops = &ct_cpu_seq_fops;
-	proc_stat->owner = THIS_MODULE;
+		proc_stat->proc_fops = &ct_cpu_seq_fops;
+		proc_stat->owner = THIS_MODULE;
+	}
 #endif
 
-	ret = nf_register_hook(&ip_conntrack_defrag_ops);
+	ret = visible_nf_register_hook(&ip_conntrack_defrag_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register pre-routing defrag hook.\n");
 		goto cleanup_proc_stat;
 	}
-	ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
+	ret = visible_nf_register_hook(&ip_conntrack_defrag_local_out_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register local_out defrag hook.\n");
 		goto cleanup_defragops;
 	}
-	ret = nf_register_hook(&ip_conntrack_in_ops);
+	ret = visible_nf_register_hook(&ip_conntrack_in_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register pre-routing hook.\n");
 		goto cleanup_defraglocalops;
 	}
-	ret = nf_register_hook(&ip_conntrack_local_out_ops);
+	ret = visible_nf_register_hook(&ip_conntrack_local_out_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register local out hook.\n");
 		goto cleanup_inops;
 	}
-	ret = nf_register_hook(&ip_conntrack_out_ops);
+	ret = visible_nf_register_hook(&ip_conntrack_out_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register post-routing hook.\n");
 		goto cleanup_inandlocalops;
 	}
-	ret = nf_register_hook(&ip_conntrack_local_in_ops);
+	ret = visible_nf_register_hook(&ip_conntrack_local_in_ops);
 	if (ret < 0) {
 		printk("ip_conntrack: can't register local in hook.\n");
 		goto cleanup_inoutandlocalops;
 	}
 #ifdef CONFIG_SYSCTL
-	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
-	if (ip_ct_sysctl_header == NULL) {
+#ifdef CONFIG_VE_IPTABLES
+	ret = ip_conntrack_sysctl_init();
+	if (ret < 0)
+		goto cleanup_sysctl;
+#endif
+	ret = -ENOMEM;
+	ve_ip_ct_sysctl_header = register_sysctl_table(ve_ip_ct_net_table, 0);
+	if (ve_ip_ct_sysctl_header == NULL) {
 		printk("ip_conntrack: can't register to sysctl.\n");
-		goto cleanup;
+		goto cleanup_sysctl2;
 	}
 #endif
 
-	return ret;
+	return 0;
 
  cleanup:
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		goto cleanup_init;
 #ifdef CONFIG_SYSCTL
- 	unregister_sysctl_table(ip_ct_sysctl_header);
+ 	unregister_sysctl_table(ve_ip_ct_sysctl_header);
+ cleanup_sysctl2:
+#ifdef CONFIG_VE_IPTABLES
+	ip_conntrack_sysctl_cleanup();
+ cleanup_sysctl:
+#endif
 #endif
-	nf_unregister_hook(&ip_conntrack_local_in_ops);
+	visible_nf_unregister_hook(&ip_conntrack_local_in_ops);
  cleanup_inoutandlocalops:
-	nf_unregister_hook(&ip_conntrack_out_ops);
+	visible_nf_unregister_hook(&ip_conntrack_out_ops);
  cleanup_inandlocalops:
-	nf_unregister_hook(&ip_conntrack_local_out_ops);
+	visible_nf_unregister_hook(&ip_conntrack_local_out_ops);
  cleanup_inops:
-	nf_unregister_hook(&ip_conntrack_in_ops);
+	visible_nf_unregister_hook(&ip_conntrack_in_ops);
  cleanup_defraglocalops:
-	nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
+	visible_nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
  cleanup_defragops:
-	nf_unregister_hook(&ip_conntrack_defrag_ops);
+	visible_nf_unregister_hook(&ip_conntrack_defrag_ops);
  cleanup_proc_stat:
 #ifdef CONFIG_PROC_FS
-	remove_proc_entry("ip_conntrack", proc_net_stat);
+	if (ve_is_super(get_exec_env()))
+		remove_proc_entry("net/stat/ip_conntrack", NULL);
  cleanup_proc_exp:
-	proc_net_remove("ip_conntrack_expect");
+	__proc_net_remove("net/ip_conntrack_expect");
  cleanup_proc:
-	proc_net_remove("ip_conntrack");
- cleanup_init:
+	__proc_net_remove("net/ip_conntrack");
 #endif /* CONFIG_PROC_FS */
+ cleanup_init:
 	ip_conntrack_cleanup();
- cleanup_nothing:
+ cleanup_unget:
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
 	return ret;
 }
 
@@ -869,11 +1047,11 @@ int ip_conntrack_protocol_register(struc
 	int ret = 0;
 
 	WRITE_LOCK(&ip_conntrack_lock);
-	if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
+	if (ve_ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
 		ret = -EBUSY;
 		goto out;
 	}
-	ip_ct_protos[proto->proto] = proto;
+	ve_ip_ct_protos[proto->proto] = proto;
  out:
 	WRITE_UNLOCK(&ip_conntrack_lock);
 	return ret;
@@ -882,7 +1060,7 @@ int ip_conntrack_protocol_register(struc
 void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
 {
 	WRITE_LOCK(&ip_conntrack_lock);
-	ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
+	ve_ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
 	WRITE_UNLOCK(&ip_conntrack_lock);
 	
 	/* Somebody could be still looking at the proto in bh. */
@@ -892,17 +1070,39 @@ void ip_conntrack_protocol_unregister(st
 	ip_ct_selective_cleanup(kill_proto, &proto->proto);
 }
 
-static int __init init(void)
+int init_iptable_conntrack(void)
 {
 	return init_or_cleanup(1);
 }
 
-static void __exit fini(void)
+void fini_iptable_conntrack(void)
 {
 	init_or_cleanup(0);
 }
 
-module_init(init);
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_conntrack();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_conntrack);
+	KSYMRESOLVE(fini_iptable_conntrack);
+	KSYMMODRESOLVE(ip_conntrack);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip_conntrack);
+	KSYMUNRESOLVE(init_iptable_conntrack);
+	KSYMUNRESOLVE(fini_iptable_conntrack);
+	fini_iptable_conntrack();
+}
+
+subsys_initcall(init);
 module_exit(fini);
 
 /* Some modules need us, but don't depend directly on any symbol.
@@ -911,18 +1111,23 @@ void need_ip_conntrack(void)
 {
 }
 
+EXPORT_SYMBOL(ip_conntrack_disable_ve0);
 EXPORT_SYMBOL(ip_conntrack_protocol_register);
 EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
 EXPORT_SYMBOL(ip_ct_get_tuplepr);
 EXPORT_SYMBOL(invert_tuplepr);
 EXPORT_SYMBOL(ip_conntrack_alter_reply);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL(ip_conntrack_destroyed);
+#endif
 EXPORT_SYMBOL(need_ip_conntrack);
 EXPORT_SYMBOL(ip_conntrack_helper_register);
 EXPORT_SYMBOL(ip_conntrack_helper_unregister);
 EXPORT_SYMBOL(ip_ct_selective_cleanup);
 EXPORT_SYMBOL(ip_ct_refresh_acct);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL(ip_ct_protos);
+#endif
 EXPORT_SYMBOL(ip_ct_find_proto);
 EXPORT_SYMBOL(ip_ct_find_helper);
 EXPORT_SYMBOL(ip_conntrack_expect_alloc);
@@ -934,9 +1139,11 @@ EXPORT_SYMBOL_GPL(ip_conntrack_expect_pu
 EXPORT_SYMBOL(ip_conntrack_tuple_taken);
 EXPORT_SYMBOL(ip_ct_gather_frags);
 EXPORT_SYMBOL(ip_conntrack_htable_size);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL(ip_conntrack_expect_list);
-EXPORT_SYMBOL(ip_conntrack_lock);
 EXPORT_SYMBOL(ip_conntrack_hash);
+#endif
+EXPORT_SYMBOL(ip_conntrack_lock);
 EXPORT_SYMBOL(ip_conntrack_untracked);
 EXPORT_SYMBOL_GPL(__ip_conntrack_find);
 EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_core.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_core.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_core.c	2004-10-19 01:53:22.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_core.c	2011-06-15 19:26:21.000000000 +0400
@@ -34,6 +34,8 @@
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/listhelp.h>
 
+#include <ub/ub_mem.h>
+
 #if 0
 #define DEBUGP printk
 #else
@@ -46,9 +48,20 @@ DECLARE_RWLOCK_EXTERN(ip_conntrack_lock)
 /* Calculated at init based on memory size */
 static unsigned int ip_nat_htable_size;
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_bysource \
+	(get_exec_env()->_ip_conntrack->_ip_nat_bysource)
+#define ve_byipsproto \
+	(get_exec_env()->_ip_conntrack->_ip_nat_bysource + ip_nat_htable_size)
+#else
 static struct list_head *bysource;
 static struct list_head *byipsproto;
+#define ve_bysource bysource
+#define ve_byipsproto byipsproto
+
 struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+#endif
 
 
 /* We keep extra hashes for each conntrack, for fast searching. */
@@ -173,7 +186,7 @@ find_appropriate_src(const struct ip_con
 	struct ip_conntrack *ct;
 
 	MUST_BE_READ_LOCKED(&ip_nat_lock);
-	list_for_each_entry(ct, &bysource[h], nat.info.bysource)
+	list_for_each_entry(ct, &ve_bysource[h], nat.info.bysource)
 		if (src_cmp(ct, tuple, mr))
 			return &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src;
 	return NULL;
@@ -227,7 +240,7 @@ count_maps(u_int32_t src, u_int32_t dst,
 
 	MUST_BE_READ_LOCKED(&ip_nat_lock);
 	h = hash_by_ipsproto(src, dst, protonum);
-	list_for_each_entry(ct, &byipsproto[h], nat.info.byipsproto)
+	list_for_each_entry(ct, &ve_byipsproto[h], nat.info.byipsproto)
 		fake_cmp(ct, src, dst, protonum, &score, conntrack);
 
 	return score;
@@ -643,8 +656,8 @@ void replace_in_hashes(struct ip_conntra
 				   .tuple.dst.protonum);
 
 	MUST_BE_WRITE_LOCKED(&ip_nat_lock);
-	list_move(&info->bysource, &bysource[srchash]);
-	list_move(&info->byipsproto, &byipsproto[ipsprotohash]);
+	list_move(&info->bysource, &ve_bysource[srchash]);
+	list_move(&info->byipsproto, &ve_byipsproto[ipsprotohash]);
 }
 
 void place_in_hashes(struct ip_conntrack *conntrack,
@@ -666,8 +679,8 @@ void place_in_hashes(struct ip_conntrack
 				   .tuple.dst.protonum);
 
 	MUST_BE_WRITE_LOCKED(&ip_nat_lock);
-	list_add(&info->bysource, &bysource[srchash]);
-	list_add(&info->byipsproto, &byipsproto[ipsprotohash]);
+	list_add(&info->bysource, &ve_bysource[srchash]);
+	list_add(&info->byipsproto, &ve_byipsproto[ipsprotohash]);
 }
 
 /* Returns true if succeeded. */
@@ -937,43 +950,60 @@ icmp_reply_translation(struct sk_buff **
 	return 0;
 }
 
-int __init ip_nat_init(void)
+int ip_nat_init(void)
 {
+	int ret;
 	size_t i;
 
 	/* Leave them the same for the moment. */
-	ip_nat_htable_size = ip_conntrack_htable_size;
+	if (ve_is_super(get_exec_env()))
+		ip_nat_htable_size = ip_conntrack_htable_size;
 
 	/* One vmalloc for both hash tables */
-	bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2);
-	if (!bysource) {
-		return -ENOMEM;
-	}
-	byipsproto = bysource + ip_nat_htable_size;
+	ret = -ENOMEM;
+	ve_bysource = ub_vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2);
+	if (!ve_bysource)
+		goto err;
+
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_protos = (struct ip_nat_protocol **)
+		ub_kmalloc(sizeof(void *)*MAX_IP_NAT_PROTO, GFP_KERNEL);
+	if (!ve_ip_nat_protos)
+		goto protos_fail;
+#endif
 
-	/* Sew in builtin protocols. */
 	WRITE_LOCK(&ip_nat_lock);
+	init_ip_nat_helpers();
 	for (i = 0; i < MAX_IP_NAT_PROTO; i++)
-		ip_nat_protos[i] = &ip_nat_unknown_protocol;
-	ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
-	ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
-	ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
+		ve_ip_nat_protos[i] = &ip_nat_unknown_protocol;
+	/* Sew in builtin protocols. */
+	ve_ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
+	ve_ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
+	ve_ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
 	WRITE_UNLOCK(&ip_nat_lock);
 
+	if (ve_is_super(get_exec_env()))
+		/* Initialize fake conntrack so that NAT will skip it */
+		ip_conntrack_untracked.nat.info.initialized |= 
+			(1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);
+
 	for (i = 0; i < ip_nat_htable_size; i++) {
-		INIT_LIST_HEAD(&bysource[i]);
-		INIT_LIST_HEAD(&byipsproto[i]);
+		INIT_LIST_HEAD(&ve_bysource[i]);
+		INIT_LIST_HEAD(&ve_byipsproto[i]);
 	}
 
 	/* FIXME: Man, this is a hack.  <SIGH> */
-	IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
-	ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+	IP_NF_ASSERT(ve_ip_conntrack_destroyed == NULL);
+	ve_ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
 	
-	/* Initialize fake conntrack so that NAT will skip it */
-	ip_conntrack_untracked.nat.info.initialized |= 
-		(1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);
-
 	return 0;
+
+#ifdef CONFIG_VE_IPTABLES
+protos_fail:
+	vfree(ve_bysource);
+#endif
+err:
+	return ret;
 }
 
 /* Clear NAT section of all conntracks, in case we're loaded again. */
@@ -987,6 +1017,11 @@ static int clean_nat(const struct ip_con
 void ip_nat_cleanup(void)
 {
 	ip_ct_selective_cleanup(&clean_nat, NULL);
-	ip_conntrack_destroyed = NULL;
-	vfree(bysource);
+	ve_ip_conntrack_destroyed = NULL;
+	vfree(ve_bysource);
+	ve_bysource = NULL;
+#ifdef CONFIG_VE_IPTABLES
+	kfree(ve_ip_nat_protos);
+	ve_ip_nat_protos = NULL;
+#endif
 }
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_ftp.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_ftp.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_ftp.c	2004-10-19 01:54:38.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_ftp.c	2011-06-15 19:26:19.000000000 +0400
@@ -19,6 +19,7 @@
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
@@ -280,17 +281,42 @@ static unsigned int help(struct ip_connt
 static struct ip_nat_helper ftp[MAX_PORTS];
 static char ftp_names[MAX_PORTS][10];
 
-/* Not __exit: called from init() */
-static void fini(void)
+void fini_iptable_nat_ftp(void)
 {
 	int i;
 
 	for (i = 0; i < ports_c; i++) {
 		DEBUGP("ip_nat_ftp: unregistering port %d\n", ports[i]);
-		ip_nat_helper_unregister(&ftp[i]);
+		visible_ip_nat_helper_unregister(&ftp[i]);
 	}
 }
 
+int init_iptable_nat_ftp(void)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("ip_nat_ftp: Trying to register for port %d\n",
+				ports[i]);
+		ret = visible_ip_nat_helper_register(&ftp[i]);
+		if (ret) {
+			printk("ip_nat_ftp: error registering "
+					"helper for port %d\n", ports[i]);
+			fini_iptable_nat_ftp();
+			return ret;
+		}
+	}
+	return 0;
+}
+/* Not __exit: called from init() */
+static void fini(void)
+{
+	KSYMMODUNRESOLVE(ip_nat_ftp);
+	KSYMUNRESOLVE(init_iptable_nat_ftp);
+	KSYMUNRESOLVE(fini_iptable_nat_ftp);
+	fini_iptable_nat_ftp();
+}
+
 static int __init init(void)
 {
 	int i, ret = 0;
@@ -318,7 +344,7 @@ static int __init init(void)
 
 		DEBUGP("ip_nat_ftp: Trying to register for port %d\n",
 				ports[i]);
-		ret = ip_nat_helper_register(&ftp[i]);
+		ret = visible_ip_nat_helper_register(&ftp[i]);
 
 		if (ret) {
 			printk("ip_nat_ftp: error registering "
@@ -328,6 +354,9 @@ static int __init init(void)
 		}
 	}
 
+	KSYMRESOLVE(init_iptable_nat_ftp);
+	KSYMRESOLVE(fini_iptable_nat_ftp);
+	KSYMMODRESOLVE(ip_nat_ftp);
 	return ret;
 }
 
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_helper.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_helper.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_helper.c	2004-10-19 01:53:22.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_helper.c	2011-06-15 19:26:21.000000000 +0400
@@ -47,7 +47,13 @@
 #define DUMP_OFFSET(x)
 #endif
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_helpers (get_exec_env()->_ip_conntrack->_ip_nat_helpers)
+#else
 static LIST_HEAD(helpers);
+#define ve_helpers helpers
+#endif
 DECLARE_LOCK(ip_nat_seqofs_lock);
 
 /* Setup TCP sequence correction given this change at this sequence */
@@ -416,10 +422,10 @@ int ip_nat_helper_register(struct ip_nat
 	int ret = 0;
 
 	WRITE_LOCK(&ip_nat_lock);
-	if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple))
+	if (LIST_FIND(&ve_helpers, helper_cmp, struct ip_nat_helper *,&me->tuple))
 		ret = -EBUSY;
 	else
-		list_prepend(&helpers, me);
+		list_prepend(&ve_helpers, me);
 	WRITE_UNLOCK(&ip_nat_lock);
 
 	return ret;
@@ -428,7 +434,7 @@ int ip_nat_helper_register(struct ip_nat
 struct ip_nat_helper *
 __ip_nat_find_helper(const struct ip_conntrack_tuple *tuple)
 {
-	return LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, tuple);
+	return LIST_FIND(&ve_helpers, helper_cmp, struct ip_nat_helper *, tuple);
 }
 
 struct ip_nat_helper *
@@ -446,21 +452,15 @@ ip_nat_find_helper(const struct ip_connt
 static int
 kill_helper(const struct ip_conntrack *i, void *helper)
 {
-	int ret;
-
-	READ_LOCK(&ip_nat_lock);
-	ret = (i->nat.info.helper == helper);
-	READ_UNLOCK(&ip_nat_lock);
-
-	return ret;
+	return i->nat.info.helper == helper;
 }
 
 void ip_nat_helper_unregister(struct ip_nat_helper *me)
 {
 	WRITE_LOCK(&ip_nat_lock);
 	/* Autoloading conntrack helper might have failed */
-	if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) {
-		LIST_DELETE(&helpers, me);
+	if (LIST_FIND(&ve_helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) {
+		LIST_DELETE(&ve_helpers, me);
 	}
 	WRITE_UNLOCK(&ip_nat_lock);
 
@@ -476,3 +476,82 @@ void ip_nat_helper_unregister(struct ip_
 	   worse. --RR */
 	ip_ct_selective_cleanup(kill_helper, me);
 }
+
+int visible_ip_nat_helper_register(struct ip_nat_helper *me)
+{
+	int ret;
+	struct module *mod = me->me;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct ip_nat_helper *tmp;
+		__module_get(mod);
+		ret = -ENOMEM;
+		tmp = kmalloc(sizeof(struct ip_nat_helper), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, me, sizeof(struct ip_nat_helper));
+		me = tmp;
+	}
+
+	ret = ip_nat_helper_register(me);
+	if (ret)
+		goto out;
+
+	return 0;
+out:
+	if (!ve_is_super(get_exec_env())) {
+		kfree(me);
+nomem:
+		module_put(mod);
+	}
+	return ret;
+}
+
+void visible_ip_nat_helper_unregister(struct ip_nat_helper *me)
+{
+	struct ip_nat_helper *i;
+
+	READ_LOCK(&ip_nat_lock);
+	list_for_each_entry(i, &ve_helpers, list) {
+		if (i->name == me->name) {
+			me = i;
+			break;
+		}
+	}
+	READ_UNLOCK(&ip_nat_lock);
+	if (me != i)
+		return;
+
+	ip_nat_helper_unregister(me);
+
+	if (!ve_is_super(get_exec_env())) {
+		module_put(me->me);
+		kfree(me);
+	}
+}
+
+/* this function gives us an ability to safely restore
+ * connection in case of failure */
+int ip_nat_install_conntrack(struct ip_conntrack *conntrack, int helper)
+{
+	int ret = 0;
+
+	WRITE_LOCK(&ip_nat_lock);
+	if (helper) {
+		conntrack->nat.info.helper = LIST_FIND(&ve_helpers, helper_cmp,
+				struct ip_nat_helper *,
+				&conntrack->tuplehash[1].tuple);
+		if (conntrack->nat.info.helper == NULL)
+			ret = -EINVAL;
+	}
+	if (!ret)
+		place_in_hashes(conntrack, &conntrack->nat.info);
+	WRITE_UNLOCK(&ip_nat_lock);
+	return ret;
+}
+EXPORT_SYMBOL(ip_nat_install_conntrack);
+
+void init_ip_nat_helpers(void)
+{
+	INIT_LIST_HEAD(&ve_helpers);
+}
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_irc.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_irc.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_irc.c	2004-10-19 01:55:35.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_irc.c	2011-06-15 19:26:19.000000000 +0400
@@ -28,6 +28,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/moduleparam.h>
+#include <linux/nfcalls.h>
 
 #if 0
 #define DEBUGP printk
@@ -200,19 +201,49 @@ static unsigned int help(struct ip_connt
 static struct ip_nat_helper ip_nat_irc_helpers[MAX_PORTS];
 static char irc_names[MAX_PORTS][10];
 
-/* This function is intentionally _NOT_ defined as  __exit, because
- * it is needed by init() */
-static void fini(void)
+void fini_iptable_nat_irc(void)
 {
 	int i;
 
 	for (i = 0; i < ports_c; i++) {
 		DEBUGP("ip_nat_irc: unregistering helper for port %d\n",
 		       ports[i]);
-		ip_nat_helper_unregister(&ip_nat_irc_helpers[i]);
+		visible_ip_nat_helper_unregister(&ip_nat_irc_helpers[i]);
 	} 
 }
 
+int init_iptable_nat_irc(void)
+{
+	int ret = 0;
+	int i;
+	struct ip_nat_helper *hlpr;
+
+	for (i = 0; i < ports_c; i++) {
+		hlpr = &ip_nat_irc_helpers[i];
+		DEBUGP
+			("ip_nat_irc: Trying to register helper for port %d: name %s\n",
+			 ports[i], hlpr->name);
+		ret = visible_ip_nat_helper_register(hlpr);
+		if (ret) {
+			printk
+				("ip_nat_irc: error registering helper for port %d\n",
+				 ports[i]);
+			fini_iptable_nat_irc();
+			return 1;
+		}
+	}
+	return 0;
+}
+/* This function is intentionally _NOT_ defined as  __exit, because
+ * it is needed by init() */
+static void fini(void)
+{
+	KSYMMODUNRESOLVE(ip_nat_irc);
+	KSYMUNRESOLVE(init_iptable_nat_irc);
+	KSYMUNRESOLVE(fini_iptable_nat_irc);
+	fini_iptable_nat_irc();
+}
+
 static int __init init(void)
 {
 	int ret = 0;
@@ -244,7 +275,7 @@ static int __init init(void)
 		DEBUGP
 		    ("ip_nat_irc: Trying to register helper for port %d: name %s\n",
 		     ports[i], hlpr->name);
-		ret = ip_nat_helper_register(hlpr);
+		ret = visible_ip_nat_helper_register(hlpr);
 
 		if (ret) {
 			printk
@@ -254,6 +285,10 @@ static int __init init(void)
 			return 1;
 		}
 	}
+
+	KSYMRESOLVE(init_iptable_nat_irc);
+	KSYMRESOLVE(fini_iptable_nat_irc);
+	KSYMMODRESOLVE(ip_nat_irc);
 	return ret;
 }
 
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_rule.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_rule.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_rule.c	2004-10-19 01:53:51.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_rule.c	2011-06-15 19:26:22.000000000 +0400
@@ -27,6 +27,8 @@
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/listhelp.h>
 
+#include <ub/ub_mem.h>
+
 #if 0
 #define DEBUGP printk
 #else
@@ -35,6 +37,14 @@
 
 #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
 
+#ifdef CONFIG_VE_IPTABLES
+#define ve_nat_table (get_exec_env()->_ip_conntrack->_ip_nat_table)
+#define ve_initial_table (get_exec_env()->_ip_conntrack->_ip_nat_initial_table)
+#else
+#define ve_nat_table		&nat_table
+#define ve_initial_table	&nat_initial_table
+#endif
+
 /* Standard entry. */
 struct ipt_standard
 {
@@ -54,12 +64,12 @@ struct ipt_error
 	struct ipt_error_target target;
 };
 
-static struct
+static struct ipt_nat_initial_table
 {
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
 	struct ipt_error term;
-} nat_initial_table __initdata
+} nat_initial_table
 = { { "nat", NAT_VALID_HOOKS, 4,
       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
       { [NF_IP_PRE_ROUTING] = 0,
@@ -242,6 +252,93 @@ static int ipt_dnat_checkentry(const cha
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_target *pt;
+	struct ip_nat_multi_range *pinfo;
+	struct compat_ip_nat_multi_range info;
+	u_int16_t tsize;
+
+	pt = (struct ipt_entry_target *)target;
+	tsize = pt->u.user.target_size;
+	if (__copy_to_user(*dstptr, pt, sizeof(struct ipt_entry_target)))
+		return -EFAULT;
+	pinfo = (struct ip_nat_multi_range *)pt->data;
+	memset(&info, 0, sizeof(struct compat_ip_nat_multi_range));
+	info.rangesize = pinfo->rangesize;
+	info.range[0].flags = pinfo->range[0].flags;
+	info.range[0].min_ip = pinfo->range[0].min_ip;
+	info.range[0].max_ip = pinfo->range[0].max_ip;
+	info.range[0].min = pinfo->range[0].min;
+	info.range[0].max = pinfo->range[0].max;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_target),
+				&info, sizeof(struct compat_ip_nat_multi_range)))
+		return -EFAULT;
+	tsize -= off;
+	if (put_user(tsize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int compat_from_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_target *pt;
+	struct ipt_entry_target *dstpt;
+	struct compat_ip_nat_multi_range *pinfo;
+	struct ip_nat_multi_range info;
+	u_int16_t tsize;
+
+	pt = (struct compat_ipt_entry_target *)target;
+	dstpt = (struct ipt_entry_target *)*dstptr;
+	tsize = pt->u.user.target_size;
+	memcpy(*dstptr, pt, sizeof(struct compat_ipt_entry_target));
+	pinfo = (struct compat_ip_nat_multi_range *)pt->data;
+	memset(&info, 0, sizeof(struct ip_nat_multi_range));
+	info.rangesize = pinfo->rangesize;
+	info.range[0].flags = pinfo->range[0].flags;
+	info.range[0].min_ip = pinfo->range[0].min_ip;
+	info.range[0].max_ip = pinfo->range[0].max_ip;
+	info.range[0].min = pinfo->range[0].min;
+	info.range[0].max = pinfo->range[0].max;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_target),
+				&info, sizeof(struct ip_nat_multi_range));
+	tsize += off;
+	dstpt->u.user.target_size = tsize;
+	*size += off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int compat(void *target, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = IPT_ALIGN(sizeof(struct ip_nat_multi_range)) -
+		COMPAT_IPT_ALIGN(sizeof(struct compat_ip_nat_multi_range));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(target, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(target, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 inline unsigned int
 alloc_null_binding(struct ip_conntrack *conntrack,
 		   struct ip_nat_info *info,
@@ -272,7 +369,7 @@ int ip_nat_rule_find(struct sk_buff **ps
 {
 	int ret;
 
-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
+	ret = ipt_do_table(pskb, hooknum, in, out, ve_nat_table, NULL);
 
 	if (ret == NF_ACCEPT) {
 		if (!(info->initialized & (1 << HOOK2MANIP(hooknum))))
@@ -286,42 +383,89 @@ static struct ipt_target ipt_snat_reg = 
 	.name		= "SNAT",
 	.target		= ipt_snat_target,
 	.checkentry	= ipt_snat_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 };
 
 static struct ipt_target ipt_dnat_reg = {
 	.name		= "DNAT",
 	.target		= ipt_dnat_target,
 	.checkentry	= ipt_dnat_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 };
 
-int __init ip_nat_rule_init(void)
+int ip_nat_rule_init(void)
 {
 	int ret;
 
-	ret = ipt_register_table(&nat_table);
+#ifdef CONFIG_VE_IPTABLES
+	if (ve_is_super(get_exec_env())) {
+		ve_nat_table = &nat_table;
+		ve_initial_table = &nat_initial_table;
+	} else {
+		/* allocate structures in ve_struct */
+		ret = -ENOMEM;
+		ve_initial_table =
+			ub_kmalloc(sizeof(nat_initial_table), GFP_KERNEL);
+		if (!ve_initial_table)
+			goto nomem_initial;
+		ve_nat_table = ub_kmalloc(sizeof(nat_table), GFP_KERNEL);
+		if (!ve_nat_table)
+			goto nomem_table;
+
+		memcpy(ve_initial_table, &nat_initial_table,
+				sizeof(nat_initial_table));
+		memcpy(ve_nat_table, &nat_table,
+				sizeof(nat_table));
+		ve_nat_table->table = &ve_initial_table->repl;
+	}
+#endif
+	ret = ipt_register_table(ve_nat_table);
 	if (ret != 0)
-		return ret;
-	ret = ipt_register_target(&ipt_snat_reg);
+		goto out;
+	ret = visible_ipt_register_target(&ipt_snat_reg);
 	if (ret != 0)
 		goto unregister_table;
 
-	ret = ipt_register_target(&ipt_dnat_reg);
+	ret = visible_ipt_register_target(&ipt_dnat_reg);
 	if (ret != 0)
 		goto unregister_snat;
 
 	return ret;
 
- unregister_snat:
-	ipt_unregister_target(&ipt_snat_reg);
- unregister_table:
-	ipt_unregister_table(&nat_table);
-
+unregister_snat:
+	visible_ipt_unregister_target(&ipt_snat_reg);
+unregister_table:
+	ipt_unregister_table(ve_nat_table);
+out:
+#ifdef CONFIG_VE_IPTABLES
+	if (!ve_is_super(get_exec_env()))
+		kfree(ve_nat_table);
+	ve_nat_table = NULL;
+nomem_table:
+	if (!ve_is_super(get_exec_env()))
+		kfree(ve_initial_table);
+	ve_initial_table = NULL;
+nomem_initial:
+#endif
 	return ret;
 }
 
 void ip_nat_rule_cleanup(void)
 {
-	ipt_unregister_target(&ipt_dnat_reg);
-	ipt_unregister_target(&ipt_snat_reg);
-	ipt_unregister_table(&nat_table);
+	ipt_unregister_table(ve_nat_table);
+	visible_ipt_unregister_target(&ipt_dnat_reg);
+	visible_ipt_unregister_target(&ipt_snat_reg);
+
+#ifdef CONFIG_VE
+	if (!ve_is_super(get_exec_env())) {
+		kfree(ve_initial_table);
+		kfree(ve_nat_table);
+	}
+	ve_initial_table = NULL;
+	ve_nat_table = NULL;
+#endif
 }
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_standalone.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_standalone.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_nat_standalone.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_nat_standalone.c	2011-06-15 19:26:19.000000000 +0400
@@ -43,6 +43,8 @@
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/listhelp.h>
 
+#include <linux/nfcalls.h>
+
 #if 0
 #define DEBUGP printk
 #else
@@ -285,11 +287,11 @@ int ip_nat_protocol_register(struct ip_n
 	int ret = 0;
 
 	WRITE_LOCK(&ip_nat_lock);
-	if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
+	if (ve_ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
 		ret = -EBUSY;
 		goto out;
 	}
-	ip_nat_protos[proto->protonum] = proto;
+	ve_ip_nat_protos[proto->protonum] = proto;
  out:
 	WRITE_UNLOCK(&ip_nat_lock);
 	return ret;
@@ -299,7 +301,7 @@ int ip_nat_protocol_register(struct ip_n
 void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
 {
 	WRITE_LOCK(&ip_nat_lock);
-	ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
+	ve_ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
 	WRITE_UNLOCK(&ip_nat_lock);
 
 	/* Someone could be still looking at the proto in a bh. */
@@ -312,6 +314,9 @@ static int init_or_cleanup(int init)
 
 	need_ip_conntrack();
 
+	if (!ve_is_super(get_exec_env()))
+		__module_get(THIS_MODULE);
+
 	if (!init) goto cleanup;
 
 	ret = ip_nat_rule_init();
@@ -324,62 +329,95 @@ static int init_or_cleanup(int init)
 		printk("ip_nat_init: can't setup rules.\n");
 		goto cleanup_rule_init;
 	}
-	ret = nf_register_hook(&ip_nat_in_ops);
+
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		return 0;
+
+	ret = visible_nf_register_hook(&ip_nat_in_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register in hook.\n");
 		goto cleanup_nat;
 	}
-	ret = nf_register_hook(&ip_nat_out_ops);
+	ret = visible_nf_register_hook(&ip_nat_out_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register out hook.\n");
 		goto cleanup_inops;
 	}
 #ifdef CONFIG_IP_NF_NAT_LOCAL
-	ret = nf_register_hook(&ip_nat_local_out_ops);
+	ret = visible_nf_register_hook(&ip_nat_local_out_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register local out hook.\n");
 		goto cleanup_outops;
 	}
-	ret = nf_register_hook(&ip_nat_local_in_ops);
+	ret = visible_nf_register_hook(&ip_nat_local_in_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register local in hook.\n");
 		goto cleanup_localoutops;
 	}
 #endif
-	return ret;
+	return 0;
 
  cleanup:
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		goto cleanup_nat;
 #ifdef CONFIG_IP_NF_NAT_LOCAL
-	nf_unregister_hook(&ip_nat_local_in_ops);
+	visible_nf_unregister_hook(&ip_nat_local_in_ops);
  cleanup_localoutops:
-	nf_unregister_hook(&ip_nat_local_out_ops);
+	visible_nf_unregister_hook(&ip_nat_local_out_ops);
  cleanup_outops:
 #endif
-	nf_unregister_hook(&ip_nat_out_ops);
+	visible_nf_unregister_hook(&ip_nat_out_ops);
  cleanup_inops:
-	nf_unregister_hook(&ip_nat_in_ops);
+	visible_nf_unregister_hook(&ip_nat_in_ops);
  cleanup_nat:
 	ip_nat_cleanup();
  cleanup_rule_init:
 	ip_nat_rule_cleanup();
  cleanup_nothing:
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
 	MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock);
 	return ret;
 }
 
-static int __init init(void)
+int init_iptable_nat(void)
 {
 	return init_or_cleanup(1);
 }
 
-static void __exit fini(void)
+void fini_iptable_nat(void)
 {
 	init_or_cleanup(0);
 }
 
-module_init(init);
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_nat();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_nat);
+	KSYMRESOLVE(fini_iptable_nat);
+	KSYMMODRESOLVE(iptable_nat);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_nat);
+	KSYMUNRESOLVE(init_iptable_nat);
+	KSYMUNRESOLVE(fini_iptable_nat);
+	fini_iptable_nat();
+}
+
+fs_initcall(init);
 module_exit(fini);
 
+EXPORT_SYMBOL(visible_ip_nat_helper_register);
+EXPORT_SYMBOL(visible_ip_nat_helper_unregister);
+
 EXPORT_SYMBOL(ip_nat_setup_info);
 EXPORT_SYMBOL(ip_nat_protocol_register);
 EXPORT_SYMBOL(ip_nat_protocol_unregister);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_queue.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_queue.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_queue.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_queue.c	2011-06-15 19:26:19.000000000 +0400
@@ -542,7 +542,14 @@ ipq_rcv_sk(struct sock *sk, int len)
 			return;
 			
 		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+#ifdef CONFIG_VE
+			struct ve_struct *env;
+			env = set_exec_env(VE_OWNER_SKB(skb));
+#endif
 			ipq_rcv_skb(skb);
+#ifdef CONFIG_VE
+			(void)set_exec_env(env);
+#endif
 			kfree_skb(skb);
 		}
 		
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ip_tables.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_tables.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ip_tables.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ip_tables.c	2011-06-15 19:26:22.000000000 +0400
@@ -23,12 +23,20 @@
 #include <linux/udp.h>
 #include <linux/icmp.h>
 #include <net/ip.h>
+#include <net/compat.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 #include <linux/proc_fs.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 
+#include <ub/ub_mem.h>
+
+#ifdef CONFIG_USER_RESOURCE
+#include <ub/beancounter.h>
+#endif
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("IPv4 packet filter");
@@ -88,7 +96,7 @@ static DECLARE_MUTEX(ipt_mutex);
 struct ipt_table_info
 {
 	/* Size per table */
-	unsigned int size;
+	unsigned int size, alloc_size;
 	/* Number of entries: FIXME. --RR */
 	unsigned int number;
 	/* Initial number of entries. Needed for module usage count */
@@ -101,13 +109,59 @@ struct ipt_table_info
 	/* ipt_entry tables: one per CPU */
 	char entries[0] ____cacheline_aligned;
 #ifndef __GENKSYMS__
-	char *tblentries[NR_CPUS];
+	unsigned char *tblentries[NR_CPUS];
 #endif
 };
 
 static LIST_HEAD(ipt_target);
 static LIST_HEAD(ipt_match);
 static LIST_HEAD(ipt_tables);
+
+#ifdef CONFIG_VE_IPTABLES
+/* include ve.h and define get_exec_env */
+#include <linux/sched.h>
+
+int init_iptables(void);
+
+#define ve_ipt_target		(*(get_exec_env()->_ipt_target))
+#define ve_ipt_match		(*(get_exec_env()->_ipt_match))
+#define ve_ipt_tables		(*(get_exec_env()->_ipt_tables))
+#define ve_ipt_standard_target	(*(get_exec_env()->_ipt_standard_target))
+#define ve_ipt_error_target	(*(get_exec_env()->_ipt_error_target))
+#define ve_tcp_matchstruct	(*(get_exec_env()->_tcp_matchstruct))
+#define ve_udp_matchstruct	(*(get_exec_env()->_udp_matchstruct))
+#define ve_icmp_matchstruct	(*(get_exec_env()->_icmp_matchstruct))
+
+
+#ifdef CONFIG_USER_RESOURCE
+#define UB_NUMIPTENT 23
+static int charge_iptables(struct user_beancounter *ub, unsigned long size)
+{
+	if (ub == NULL)
+		return 0;
+	return charge_beancounter(ub, UB_NUMIPTENT, size, 1);
+}
+static void uncharge_iptables(struct user_beancounter *ub, unsigned long size)
+{
+	if (ub == NULL)
+		return;
+	uncharge_beancounter(ub, UB_NUMIPTENT, size);
+}
+#endif	/* CONFIG_USER_RESOURCE */
+
+#else	/* CONFIG_VE_IPTABLES */
+
+#define ve_ipt_target		ipt_target
+#define ve_ipt_match		ipt_match
+#define ve_ipt_tables		ipt_tables
+#define ve_ipt_standard_target	ipt_standard_target
+#define ve_ipt_error_target	ipt_error_target
+#define ve_tcp_matchstruct	tcp_matchstruct
+#define ve_udp_matchstruct	udp_matchstruct
+#define ve_icmp_matchstruct	icmp_matchstruct
+
+#endif	/* CONFIG_VE_IPTABLES */
+
 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
 
 #if 0
@@ -116,6 +170,8 @@ static LIST_HEAD(ipt_tables);
 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
 #endif
 
+#define ipt_table_info_ub(info)	(mem_ub(info))
+
 /* Returns whether matches rule or not. */
 static inline int
 ip_packet_match(const struct iphdr *ip,
@@ -303,7 +359,7 @@ ipt_do_table(struct sk_buff **pskb,
 	do {
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
-		(*pskb)->nfcache |= e->nfcache;
+		(*pskb)->nfcache |= e->nfcache & NFC_IPT_MASK;
 		if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
 			struct ipt_entry_target *t;
 
@@ -410,9 +466,9 @@ find_inlist_lock_noload(struct list_head
 
 #if 0 
 	duprintf("find_inlist: searching for `%s' in %s.\n",
-		 name, head == &ipt_target ? "ipt_target"
-		 : head == &ipt_match ? "ipt_match"
-		 : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
+		 name, head == &ve_ipt_target ? "ipt_target"
+		 : head == &ve_ipt_match ? "ipt_match"
+		 : head == &ve_ipt_tables ? "ipt_tables" : "UNKNOWN");
 #endif
 
 	*error = down_interruptible(mutex);
@@ -453,19 +509,19 @@ find_inlist_lock(struct list_head *head,
 static inline struct ipt_table *
 ipt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
 {
-	return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
+	return find_inlist_lock(&ve_ipt_tables, name, "iptable_", error, mutex);
 }
 
 static inline struct ipt_match *
 find_match_lock(const char *name, int *error, struct semaphore *mutex)
 {
-	return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
+	return find_inlist_lock(&ve_ipt_match, name, "ipt_", error, mutex);
 }
 
 static struct ipt_target *
 ipt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
 {
-	return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
+	return find_inlist_lock(&ve_ipt_target, name, "ipt_", error, mutex);
 }
 
 /* All zeroes == unconditional rule. */
@@ -487,7 +543,7 @@ static int
 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
 {
 	unsigned int hook;
-	char *entry0 = newinfo->tblentries[0];
+	unsigned char *entry0 = newinfo->tblentries[0];
 
 	/* No recursion; use packet counter to save back ptrs (reset
 	   to 0 as we leave), and comefrom to save source hook bitmask */
@@ -507,7 +563,7 @@ mark_source_chains(struct ipt_table_info
 				= (void *)ipt_get_target(e);
 
 			if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
-				printk("iptables: loop hook %u pos %u %08X.\n",
+				ve_printk(VE_LOG, "iptables: loop hook %u pos %u %08X.\n",
 				       hook, pos, e->comefrom);
 				return 0;
 			}
@@ -522,6 +578,13 @@ mark_source_chains(struct ipt_table_info
 			    && unconditional(&e->ip)) {
 				unsigned int oldpos, size;
 
+				if (t->verdict < -NF_MAX_VERDICT - 1) {
+					duprintf("mark_source_chains: bad "
+						"negative verdict (%i)\n",
+							t->verdict);
+					return 0;
+				}
+
 				/* Return: backtrack through the last
 				   big jump. */
 				do {
@@ -559,6 +622,14 @@ mark_source_chains(struct ipt_table_info
 				if (strcmp(t->target.u.user.name,
 					   IPT_STANDARD_TARGET) == 0
 				    && newpos >= 0) {
+					if (newpos > newinfo->size -
+						sizeof(struct ipt_entry)) {
+						duprintf("mark_source_chains: "
+							"bad verdict (%i)\n",
+								newpos);
+						return 0;
+					}
+
 					/* This a jump; chase it. */
 					duprintf("Jump rule %u -> %u\n",
 						 pos, newpos);
@@ -577,7 +648,6 @@ mark_source_chains(struct ipt_table_info
 	}
 	return 1;
 }
-
 static inline int
 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
 {
@@ -595,29 +665,15 @@ static inline int
 standard_check(const struct ipt_entry_target *t,
 	       unsigned int max_offset)
 {
-	struct ipt_standard_target *targ = (void *)t;
-
 	/* Check standard info. */
 	if (t->u.target_size
 	    != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
 		duprintf("standard_check: target size %u != %u\n",
-			 t->u.target_size,
+			 t->u.target_size, (unsigned int)
 			 IPT_ALIGN(sizeof(struct ipt_standard_target)));
 		return 0;
 	}
 
-	if (targ->verdict >= 0
-	    && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
-		duprintf("ipt_standard_check: bad verdict (%i)\n",
-			 targ->verdict);
-		return 0;
-	}
-
-	if (targ->verdict < -NF_MAX_VERDICT - 1) {
-		duprintf("ipt_standard_check: bad negative verdict (%i)\n",
-			 targ->verdict);
-		return 0;
-	}
 	return 1;
 }
 
@@ -625,6 +681,24 @@ static inline int
 check_match(struct ipt_entry_match *m,
 	    const char *name,
 	    const struct ipt_ip *ip,
+	    unsigned int hookmask)
+{
+	if (m->u.kernel.match->checkentry
+	    && !m->u.kernel.match->checkentry(name, ip, m->data,
+					      m->u.match_size - sizeof(*m),
+					      hookmask)) {
+		duprintf("check_match: check failed for `%s'.\n",
+			 m->u.kernel.match->name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline int
+find_check_match(struct ipt_entry_match *m,
+	    const char *name,
+	    const struct ipt_ip *ip,
 	    unsigned int hookmask,
 	    unsigned int *i)
 {
@@ -633,7 +707,7 @@ check_match(struct ipt_entry_match *m,
 
 	match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
 	if (!match) {
-		duprintf("check_match: `%s' not found\n", m->u.user.name);
+		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
 		return ret;
 	}
 	if (!try_module_get(match->me)) {
@@ -643,24 +717,57 @@ check_match(struct ipt_entry_match *m,
 	m->u.kernel.match = match;
 	up(&ipt_mutex);
 
-	if (m->u.kernel.match->checkentry
-	    && !m->u.kernel.match->checkentry(name, ip, m->data,
-					      m->u.match_size - sizeof(*m),
-					      hookmask)) {
-		module_put(m->u.kernel.match->me);
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 m->u.kernel.match->name);
+	(*i)++;
+	return check_match(m, name, ip, hookmask);
+}
+
+static struct ipt_target ipt_standard_target;
+
+static inline int
+check_target(struct ipt_entry *e, const char *name, unsigned int size)
+{
+	struct ipt_entry_target *t;
+
+	t = ipt_get_target(e);
+	if (t->u.kernel.target == &ve_ipt_standard_target) {
+		if (!standard_check(t, size))
+			return -EINVAL;
+	} else if (t->u.kernel.target->checkentry
+		   && !t->u.kernel.target->checkentry(name, e, t->data,
+						      t->u.target_size
+						      - sizeof(*t),
+						      e->comefrom)) {
+		duprintf("check_target: check failed for `%s'.\n",
+			 t->u.kernel.target->name);
 		return -EINVAL;
 	}
 
-	(*i)++;
 	return 0;
 }
 
-static struct ipt_target ipt_standard_target;
+static inline int
+check_entry(struct ipt_entry *e, const char *name)
+{
+	struct ipt_entry_target *t;
+
+	if (!ip_checkentry(&e->ip)) {
+		duprintf("check_entry: ip check failed %p %s.\n", e, name);
+		return -EINVAL;
+	}
+
+	if (e->target_offset + sizeof(struct ipt_entry_target) >
+		e->next_offset)
+		return -EINVAL;
+
+	t = ipt_get_target(e);
+	if (e->target_offset + t->u.target_size > e->next_offset)
+		return -EINVAL;
+
+	return 0;
+}
 
 static inline int
-check_entry(struct ipt_entry *e, const char *name, unsigned int size,
+find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 	    unsigned int *i)
 {
 	struct ipt_entry_target *t;
@@ -668,20 +775,22 @@ check_entry(struct ipt_entry *e, const c
 	int ret;
 	unsigned int j;
 
-	if (!ip_checkentry(&e->ip)) {
-		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
-		return -EINVAL;
-	}
+	ret = check_entry(e, name);
+	if (ret != 0)
+		return ret;
 
 	j = 0;
-	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
+	ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, e->comefrom, &j);
 	if (ret != 0)
 		goto cleanup_matches;
 
 	t = ipt_get_target(e);
+	ret = -EINVAL;
+	if (e->target_offset + t->u.target_size > e->next_offset)
+			goto cleanup_matches;
 	target = ipt_find_target_lock(t->u.user.name, &ret, &ipt_mutex);
 	if (!target) {
-		duprintf("check_entry: `%s' not found\n", t->u.user.name);
+		duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
 		goto cleanup_matches;
 	}
 	if (!try_module_get(target->me)) {
@@ -692,26 +801,14 @@ check_entry(struct ipt_entry *e, const c
 	t->u.kernel.target = target;
 	up(&ipt_mutex);
 
-	if (t->u.kernel.target == &ipt_standard_target) {
-		if (!standard_check(t, size)) {
-			ret = -EINVAL;
-			goto cleanup_matches;
-		}
-	} else if (t->u.kernel.target->checkentry
-		   && !t->u.kernel.target->checkentry(name, e, t->data,
-						      t->u.target_size
-						      - sizeof(*t),
-						      e->comefrom)) {
-		module_put(t->u.kernel.target->me);
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 t->u.kernel.target->name);
-		ret = -EINVAL;
-		goto cleanup_matches;
-	}
-
+	ret = check_target(e, name, size);
+	if (ret)
+		goto put_target;
 	(*i)++;
 	return 0;
 
+ put_target:
+ 	module_put(target->me);
  cleanup_matches:
 	IPT_MATCH_ITERATE(e, cleanup_match, &j);
 	return ret;
@@ -791,7 +888,7 @@ translate_table(const char *name,
 {
 	unsigned int i;
 	int ret;
-	char *entry0 = newinfo->tblentries[0];
+	unsigned char *entry0 = newinfo->tblentries[0];
 
 	newinfo->size = size;
 	newinfo->number = number;
@@ -843,11 +940,10 @@ translate_table(const char *name,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				check_entry, name, size, &i);
-
+				find_check_entry, name, size, &i);
 	if (ret != 0) {
 		IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				  cleanup_entry, &i);
+				cleanup_entry, &i);
 		return ret;
 	}
 
@@ -860,6 +956,66 @@ translate_table(const char *name,
 	return ret;
 }
 
+#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
+static int charge_replace_table(struct ipt_table_info *oldinfo,
+				struct ipt_table_info *newinfo)
+{
+	struct user_beancounter *old_ub, *new_ub;
+	int old_number, new_number;
+
+	old_ub = ipt_table_info_ub(oldinfo);
+	new_ub = ipt_table_info_ub(newinfo);
+	old_number = oldinfo->number;
+	new_number = newinfo->number;
+
+	/* XXX: I don't understand the code below and am not sure that it does
+	 * something reasonable.  2002/04/26  SAW */
+	if (old_ub == new_ub) {
+		int charge;
+		/* charge only differences in entries */
+		charge = new_number - old_number;
+	 	if (charge > 0) {
+			if (charge_iptables(old_ub, charge))
+				return -1;
+		} else
+			uncharge_iptables(old_ub, -charge);
+	} else {
+		/* different contexts; do charge current and uncharge old */
+		if (charge_iptables(new_ub, new_number))
+			return -1;
+		uncharge_iptables(old_ub, old_number);
+	}
+	return 0;
+}
+#endif
+
+static int setup_table(struct ipt_table *table, struct ipt_table_info *info)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+	{
+		struct ipt_entry *table_base;
+		unsigned int i;
+
+		for_each_cpu(i) {
+			table_base = (struct ipt_entry *)info->tblentries[i];
+			table_base->comefrom = 0xdead57ac;
+		}
+	}
+#endif
+#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
+	{
+		struct user_beancounter *ub;
+
+		ub = ipt_table_info_ub(info);
+		if (charge_iptables(ub, info->number))
+			return -ENOMEM;
+	}
+#endif
+	table->private = info;
+	info->initial_entries = 0;
+	return 0;
+}
+
 static struct ipt_table_info *
 replace_table(struct ipt_table *table,
 	      unsigned int num_counters,
@@ -892,6 +1048,16 @@ replace_table(struct ipt_table *table,
 		return NULL;
 	}
 	oldinfo = table->private;
+
+#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
+	if (charge_replace_table(oldinfo, newinfo)) {
+		oldinfo = NULL;
+		write_unlock_bh(&table->lock);
+		*error = -ENOMEM;
+		return NULL;
+ 	}
+#endif
+
 	table->private = newinfo;
 	newinfo->initial_entries = oldinfo->initial_entries;
 	write_unlock_bh(&table->lock);
@@ -928,24 +1094,19 @@ get_counters(const struct ipt_table_info
 	}
 }
 
-static int
-copy_entries_to_user(unsigned int total_size,
-		     struct ipt_table *table,
-		     void __user *userptr)
+static inline struct ipt_counters * alloc_counters(struct ipt_table *table)
 {
-	unsigned int off, num, countersize;
-	struct ipt_entry *e;
 	struct ipt_counters *counters;
-	int ret = 0;
+	unsigned int countersize;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct ipt_counters) * table->private->number;
-	counters = vmalloc(countersize);
+	counters = vmalloc_best(countersize);
 
 	if (counters == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* First, sum counters... */
 	memset(counters, 0, countersize);
@@ -953,6 +1114,23 @@ copy_entries_to_user(unsigned int total_
 	get_counters(table->private, counters);
 	write_unlock_bh(&table->lock);
 
+	return counters;
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+		     struct ipt_table *table,
+		     void __user *userptr)
+{
+	unsigned int off, num;
+	struct ipt_entry *e;
+	struct ipt_counters *counters;
+	int ret = 0;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
 	/* ... then copy entire thing from CPU 0... */
 	if (copy_to_user(userptr, table->private->tblentries[0],
 	    total_size) != 0) {
@@ -1008,134 +1186,492 @@ copy_entries_to_user(unsigned int total_
 	return ret;
 }
 
-static int
-get_entries(const struct ipt_get_entries *entries,
-	    struct ipt_get_entries __user *uptr)
-{
-	int ret;
-	struct ipt_table *t;
+#ifdef CONFIG_COMPAT
+static DECLARE_MUTEX(compat_ipt_mutex);
 
-	t = ipt_find_table_lock(entries->name, &ret, &ipt_mutex);
-	if (t) {
-		duprintf("t->private->number = %u\n",
-			 t->private->number);
-		if (entries->size == t->private->size)
-			ret = copy_entries_to_user(t->private->size,
-						   t, uptr->entrytable);
-		else {
-			duprintf("get_entries: I've got %u not %u!\n",
-				 t->private->size,
-				 entries->size);
-			ret = -EINVAL;
-		}
-		up(&ipt_mutex);
-	} else
-		duprintf("get_entries: Can't find %s!\n",
-			 entries->name);
+struct compat_delta {
+	struct compat_delta *next;
+	unsigned int offset;
+	short delta;
+};
 
-	return ret;
-}
+static struct compat_delta *compat_offsets = NULL;
 
-void ipt_free_table_info(struct ipt_table_info *info)
+static int compat_add_offset(unsigned int offset, short delta)
 {
-	int cpu;
-	for_each_cpu(cpu) {
-		if (info->size <= PAGE_SIZE)
-			kfree(info->tblentries[cpu]);
-		else
-			vfree(info->tblentries[cpu]);
+	struct compat_delta *tmp;
+
+	tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+	tmp->offset = offset;
+	tmp->delta = delta;
+	if (compat_offsets) {
+		tmp->next = compat_offsets->next;
+		compat_offsets->next = tmp;
+	} else {
+		compat_offsets = tmp;
+		tmp->next = NULL;
 	}
-	kfree(info);
+	return 0;
 }
 
-struct ipt_table_info *ipt_alloc_table_info(unsigned int size)
+static void compat_flush_offsets(void)
 {
-	struct ipt_table_info *newinfo;
-	unsigned int cpu;
-
-	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
-		return NULL;
-
-	/* kzalloc the main struct */
-	newinfo = kzalloc(sizeof(struct ipt_table_info), GFP_KERNEL);
-	if (!newinfo)
-		return NULL;
-
-	newinfo->size = size;
-
-	/* allocate the table for each CPU */
-	for_each_cpu(cpu) {
-		if (size <= PAGE_SIZE)
-			newinfo->tblentries[cpu] = kmalloc(size, GFP_KERNEL);
-		else
-			newinfo->tblentries[cpu] = vmalloc(size);
+	struct compat_delta *tmp, *next;
 
-		if (newinfo->tblentries[cpu] == NULL) {
-			ipt_free_table_info(newinfo);
-			return NULL;
+	if (compat_offsets) {
+		for(tmp = compat_offsets; tmp; tmp = next) {
+			next = tmp->next;
+			kfree(tmp);
 		}
+		compat_offsets = NULL;
 	}
+}
 
-	return newinfo;
+static short compat_calc_jump(unsigned int offset)
+{
+	struct compat_delta *tmp;
+	short delta;
+
+	for(tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
+		if (tmp->offset < offset)
+			delta += tmp->delta;
+	return delta;
 }
 
-static int
-do_replace(void __user *user, unsigned int len)
+struct compat_ipt_standard_target
 {
-	int ret;
-	struct ipt_replace tmp;
-	struct ipt_table *t;
-	struct ipt_table_info *newinfo, *oldinfo;
-	struct ipt_counters *counters;
+	struct compat_ipt_entry_target target;
+	compat_int_t verdict;
+};
 
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-		return -EFAULT;
+#define IPT_ST_OFFSET	(sizeof(struct ipt_standard_target) - \
+				sizeof(struct compat_ipt_standard_target))
 
-	/* Hack: Causes ipchains to give correct error msg --RR */
-	if (len != sizeof(tmp) + tmp.size)
-		return -ENOPROTOOPT;
+struct ipt_standard
+{
+	struct ipt_entry entry;
+	struct ipt_standard_target target;
+};
 
-	/* overflow check */
-	if (tmp.size >= (INT_MAX - sizeof(struct ipt_table_info)) / NR_CPUS -
-			SMP_CACHE_BYTES)
-		return -ENOMEM;
-	if (tmp.num_counters >= INT_MAX / sizeof(struct ipt_counters))
-		return -ENOMEM;
+struct compat_ipt_standard
+{
+	struct compat_ipt_entry entry;
+	struct compat_ipt_standard_target target;
+};
 
-	newinfo = ipt_alloc_table_info(tmp.size);
-	if (!newinfo)
-		return -ENOMEM;
+static int compat_ipt_standard_fn(void *target,
+		void **dstptr, int *size, int convert)
+{
+	struct compat_ipt_standard_target compat_st, *pcompat_st;
+	struct ipt_standard_target st, *pst;
+	int ret;
 
-	if (copy_from_user(newinfo->tblentries[0], user + sizeof(tmp),
-			   tmp.size) != 0) {
-		ret = -EFAULT;
-		goto free_newinfo;
+	ret = 0;
+	switch (convert) {
+		case COMPAT_TO_USER:
+			pst = (struct ipt_standard_target *)target;
+			memcpy(&compat_st.target, &pst->target,
+					sizeof(struct ipt_entry_target));
+			compat_st.verdict = pst->verdict;
+			if (compat_st.verdict > 0)
+				compat_st.verdict -=
+					compat_calc_jump(compat_st.verdict);
+			compat_st.target.u.user.target_size =
+			sizeof(struct compat_ipt_standard_target);
+			if (__copy_to_user(*dstptr, &compat_st,
+				sizeof(struct compat_ipt_standard_target)))
+				ret = -EFAULT;
+			*size -= IPT_ST_OFFSET;
+			*dstptr += sizeof(struct compat_ipt_standard_target);
+			break;
+		case COMPAT_FROM_USER:
+			pcompat_st =
+				(struct compat_ipt_standard_target *)target;
+			memcpy(&st.target, &pcompat_st->target,
+					sizeof(struct ipt_entry_target));
+			st.verdict = pcompat_st->verdict;
+			if (st.verdict > 0)
+				st.verdict += compat_calc_jump(st.verdict);
+			st.target.u.user.target_size =
+			sizeof(struct ipt_standard_target);
+			memcpy(*dstptr, &st,
+					sizeof(struct ipt_standard_target));
+			*size += IPT_ST_OFFSET;
+			*dstptr += sizeof(struct ipt_standard_target);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += IPT_ST_OFFSET;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
 	}
+	return ret;
+}
 
-	counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
-	if (!counters) {
-		ret = -ENOMEM;
-		goto free_newinfo;
-	}
-	memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
+int ipt_target_align_compat(void *target, void **dstptr,
+		int *size, int off, int convert)
+{
+	struct compat_ipt_entry_target *pcompat;
+	struct ipt_entry_target *pt;
+	u_int16_t tsize;
+	int ret;
 
-	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.size, tmp.num_entries,
-			      tmp.hook_entry, tmp.underflow);
-	if (ret != 0)
-		goto free_newinfo_counters;
+	ret = 0;
+	switch (convert) {
+		case COMPAT_TO_USER:
+			pt = (struct ipt_entry_target *)target;
+			tsize = pt->u.user.target_size;
+			if (__copy_to_user(*dstptr, pt, tsize)) {
+				ret = -EFAULT;
+				break;
+			}
+			tsize -= off;
+			if (put_user(tsize, (u_int16_t *)*dstptr))
+				ret = -EFAULT;
+			*size -= off;
+			*dstptr += tsize;
+			break;
+		case COMPAT_FROM_USER:
+			pcompat = (struct compat_ipt_entry_target *)target;
+			pt = (struct ipt_entry_target *)*dstptr;
+			tsize = pcompat->u.user.target_size;
+			memcpy(pt, pcompat, tsize);
+			tsize += off;
+			pt->u.user.target_size = tsize;
+			*size += off;
+			*dstptr += tsize;
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
 
-	duprintf("ip_tables: Translated table\n");
+int ipt_match_align_compat(void *match, void **dstptr,
+		int *size, int off, int convert)
+{
+	struct compat_ipt_entry_match *pcompat_m;
+	struct ipt_entry_match *pm;
+	u_int16_t msize;
+	int ret;
+
+	ret = 0;
+	switch (convert) {
+		case COMPAT_TO_USER:
+			pm = (struct ipt_entry_match *)match;
+			msize = pm->u.user.match_size;
+			if (__copy_to_user(*dstptr, pm, msize)) {
+				ret = -EFAULT;
+				break;
+			}
+			msize -= off;
+			if (put_user(msize, (u_int16_t *)*dstptr))
+				ret = -EFAULT;
+			*size -= off;
+			*dstptr += msize;
+			break;
+		case COMPAT_FROM_USER:
+			pcompat_m = (struct compat_ipt_entry_match *)match;
+			pm = (struct ipt_entry_match *)*dstptr;
+			msize = pcompat_m->u.user.match_size;
+			memcpy(pm, pcompat_m, msize);
+			msize += off;
+			pm->u.user.match_size = msize;
+			*size += off;
+			*dstptr += msize;
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+
+static int tcp_compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_tcp)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_tcp));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+
+static int udp_compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_udp)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_udp));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+
+static int icmp_compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_icmp)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_icmp));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+
+static inline int
+compat_calc_match(struct ipt_entry_match *m, int * size)
+{
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
+	return 0;
+}
+
+static int compat_calc_entry(struct ipt_entry *e,
+		struct ipt_table_info *info, struct ipt_table_info *newinfo)
+{
+	struct ipt_entry_target *t;
+	unsigned int entry_offset;
+	int off, i, ret;
+
+	off = 0;
+	entry_offset = (void *)e - (void *)info->tblentries[0];
+	IPT_MATCH_ITERATE(e, compat_calc_match, &off);
+	t = ipt_get_target(e);
+	if (t->u.kernel.target->compat)
+		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
+	newinfo->size -= off;
+	ret = compat_add_offset(entry_offset, off);
+	if (ret)
+		return ret;
+
+	for (i = 0; i< NF_IP_NUMHOOKS; i++) {
+		if (info->hook_entry[i] && (e < (struct ipt_entry *)
+				(info->tblentries[0] + info->hook_entry[i])))
+			newinfo->hook_entry[i] -= off;
+		if (info->underflow[i] && (e < (struct ipt_entry *)
+				(info->tblentries[0] + info->underflow[i])))
+			newinfo->underflow[i] -= off;
+	}
+	return 0;
+}
+
+static int compat_table_info(struct ipt_table_info *info,
+		struct ipt_table_info *newinfo)
+{
+	int i;
+
+	if (!newinfo || !info)
+		return -EINVAL;
+
+	memset(newinfo, 0, sizeof(struct ipt_table_info));
+	newinfo->size = info->size;
+	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+		newinfo->hook_entry[i] = info->hook_entry[i];
+		newinfo->underflow[i] = info->underflow[i];
+	}
+	return IPT_ENTRY_ITERATE(info->tblentries[0],
+			info->size, compat_calc_entry, info, newinfo);
+}
+#endif
+
+static int get_info(void __user *user, int *len)
+{
+	char name[IPT_TABLE_MAXNAMELEN];
+	struct ipt_table *t;
+	int ret, size;
+
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits())
+		size = sizeof(struct compat_ipt_getinfo);
+	else
+#endif
+		size = sizeof(struct ipt_getinfo);
+
+	if (*len != size) {
+		duprintf("length %u != %u\n", *len,
+			(unsigned int)sizeof(struct ipt_getinfo));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(name, user, sizeof(name)) != 0)
+		return -EFAULT;
+
+	name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+#ifdef CONFIG_COMPAT
+	down(&compat_ipt_mutex);
+#endif
+	t = ipt_find_table_lock(name, &ret, &ipt_mutex);
+	if (t) {
+		struct ipt_getinfo info;
+#ifdef CONFIG_COMPAT
+		struct compat_ipt_getinfo compat_info;
+#endif
+		void *pinfo;
+
+#ifdef CONFIG_COMPAT
+		if (is_current_32bits()) {
+			struct ipt_table_info t_info;
+			ret = compat_table_info(t->private, &t_info);
+			compat_flush_offsets();
+			memcpy(compat_info.hook_entry, t_info.hook_entry,
+					sizeof(compat_info.hook_entry));
+			memcpy(compat_info.underflow, t_info.underflow,
+					sizeof(compat_info.underflow));
+			compat_info.valid_hooks = t->valid_hooks;
+			compat_info.num_entries = t->private->number;
+			compat_info.size = t_info.size;
+			strcpy(compat_info.name, name);
+			pinfo = (void *)&compat_info;
+		} else
+#endif
+		{
+			info.valid_hooks = t->valid_hooks;
+			memcpy(info.hook_entry, t->private->hook_entry,
+					sizeof(info.hook_entry));
+			memcpy(info.underflow, t->private->underflow,
+					sizeof(info.underflow));
+			info.num_entries = t->private->number;
+			info.size = t->private->size;
+			strcpy(info.name, name);
+			pinfo = (void *)&info;
+		}
+
+		if (copy_to_user(user, pinfo, *len) != 0)
+			ret = -EFAULT;
+		else
+			ret = 0;
+
+		up(&ipt_mutex);
+	}
+#ifdef CONFIG_COMPAT
+	up(&compat_ipt_mutex);
+#endif
+	return ret;
+}
+
+static int
+get_entries(struct ipt_get_entries __user *uptr, int *len)
+{
+	int ret;
+	struct ipt_get_entries get;
+	struct ipt_table *t;
+
+	if (*len < sizeof(get)) {
+		duprintf("get_entries: %u < %d\n", *len,
+				(unsigned int)sizeof(get));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
+
+	if (*len != sizeof(struct ipt_get_entries) + get.size) {
+		duprintf("get_entries: %u != %u\n", *len,
+				(unsigned int)(sizeof(struct ipt_get_entries) +
+				get.size));
+		return -EINVAL;
+	}
+
+	t = ipt_find_table_lock(get.name, &ret, &ipt_mutex);
+	if (t) {
+		duprintf("t->private->number = %u\n",
+			 t->private->number);
+		if (get.size == t->private->size)
+			ret = copy_entries_to_user(t->private->size,
+						   t, uptr->entrytable);
+		else {
+			duprintf("get_entries: I've got %u not %u!\n",
+				 t->private->size,
+				 get.size);
+			ret = -EINVAL;
+		}
+		up(&ipt_mutex);
+	} else
+		duprintf("get_entries: Can't find %s!\n",
+			 get.name);
+
+	return ret;
+}
+
+void ipt_free_table_info(struct ipt_table_info *info)
+{
+	int cpu;
+	for_each_cpu(cpu) {
+		if (info->alloc_size <= PAGE_SIZE)
+			kfree(info->tblentries[cpu]);
+		else
+			vfree(info->tblentries[cpu]);
+	}
+	kfree(info);
+}
+
+struct ipt_table_info *ipt_alloc_table_info(unsigned int size)
+{
+	struct ipt_table_info *newinfo;
+	unsigned int cpu;
+
+	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
+		return NULL;
+
+	/* kzalloc the main struct */
+	newinfo = kzalloc(sizeof(struct ipt_table_info), GFP_KERNEL_UBC);
+	if (!newinfo)
+		return NULL;
+
+	newinfo->size = newinfo->alloc_size = size;
 
-	t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
+	/* allocate the table for each CPU */
+	for_each_cpu(cpu) {
+		if (size <= PAGE_SIZE)
+			newinfo->tblentries[cpu] = ub_kmalloc(size, GFP_KERNEL);
+		else
+			newinfo->tblentries[cpu] = ub_vmalloc(size);
+
+		if (newinfo->tblentries[cpu] == NULL) {
+			ipt_free_table_info(newinfo);
+			return NULL;
+		}
+	}
+
+	return newinfo;
+}
+
+static int
+__do_replace(const char *name, unsigned int valid_hooks,
+		struct ipt_table_info *newinfo, unsigned int size,
+		unsigned int num_counters, void __user *counters_ptr)
+{
+	int ret;
+	struct ipt_table *t;
+	struct ipt_table_info *oldinfo;
+	struct ipt_counters *counters;
+
+	counters = ub_vmalloc_best(num_counters *
+					sizeof(struct ipt_counters));
+	if (!counters) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	memset(counters, 0, num_counters * sizeof(struct ipt_counters));
+
+	t = ipt_find_table_lock(name, &ret, &ipt_mutex);
 	if (!t)
 		goto free_newinfo_counters_untrans;
 
 	/* You lied! */
-	if (tmp.valid_hooks != t->valid_hooks) {
+	if (valid_hooks != t->valid_hooks) {
 		duprintf("Valid hook crap: %08X vs %08X\n",
-			 tmp.valid_hooks, t->valid_hooks);
+			 valid_hooks, t->valid_hooks);
 		ret = -EINVAL;
 		goto free_newinfo_counters_untrans_unlock;
 	}
@@ -1146,127 +1682,767 @@ do_replace(void __user *user, unsigned i
 		goto free_newinfo_counters_untrans_unlock;
 	}
 
+	oldinfo = replace_table(t, num_counters, newinfo, &ret);
+	if (!oldinfo)
+		goto put_module;
+
+	/* Update module usage count based on number of rules */
+	duprintf("__do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+		oldinfo->number, oldinfo->initial_entries, newinfo->number);
+	if ((oldinfo->number > oldinfo->initial_entries) || 
+	    (newinfo->number <= oldinfo->initial_entries)) 
+		module_put(t->me);
+	if ((oldinfo->number > oldinfo->initial_entries) &&
+	    (newinfo->number <= oldinfo->initial_entries))
+		module_put(t->me);
+
+
+	/* Get the old counters. */
+	get_counters(oldinfo, counters);
+	/* Decrease module usage counts and free resource */
+	IPT_ENTRY_ITERATE(oldinfo->tblentries[0], oldinfo->size,
+				cleanup_entry,NULL);
+	ipt_free_table_info(oldinfo);
+	/* Silent error: too late now. */
+	copy_to_user(counters_ptr, counters,
+		     sizeof(struct ipt_counters) * num_counters);
+	vfree(counters);
+	up(&ipt_mutex);
+	return 0;
+ put_module:
+	module_put(t->me);
+ free_newinfo_counters_untrans_unlock:
+	up(&ipt_mutex);
+ free_newinfo_counters_untrans:
+	vfree(counters);
+ out:
+	return ret;
+}
+
+static int
+do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct ipt_replace tmp;
+	struct ipt_table_info *newinfo;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* Hack: Causes ipchains to give correct error msg --RR */
+	if (len != sizeof(tmp) + tmp.size)
+		return -ENOPROTOOPT;
+
+	/* overflow check */
+	if (tmp.size >= (INT_MAX - sizeof(struct ipt_table_info)) / NR_CPUS -
+			SMP_CACHE_BYTES)
+		return -ENOMEM;
+	if (tmp.num_counters >= INT_MAX / sizeof(struct ipt_counters))
+		return -ENOMEM;
+
+	newinfo = ipt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
+
+	if (copy_from_user(newinfo->tblentries[0], user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	ret = translate_table(tmp.name, tmp.valid_hooks,
+			      newinfo, tmp.size, tmp.num_entries,
+			      tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
+
+	duprintf("ip_tables: Translated table\n");
+
+	ret = __do_replace(tmp.name, tmp.valid_hooks,
+			      newinfo, tmp.size, tmp.num_counters,
+			      tmp.counters);
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
+
+ free_newinfo_untrans:
+	IPT_ENTRY_ITERATE(newinfo->tblentries[0], newinfo->size,
+				cleanup_entry,NULL);
+ free_newinfo:
+	ipt_free_table_info(newinfo);
+	return ret;
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static inline int
+add_counter_to_entry(struct ipt_entry *e,
+		     const struct ipt_counters addme[],
+		     unsigned int *i)
+{
+#if 0
+	duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
+		 *i,
+		 (long unsigned int)e->counters.pcnt,
+		 (long unsigned int)e->counters.bcnt,
+		 (long unsigned int)addme[*i].pcnt,
+		 (long unsigned int)addme[*i].bcnt);
+#endif
+
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
+
+static int
+do_add_counters(void __user *user, unsigned int len)
+{
+	unsigned int i;
+	struct ipt_counters_info tmp;
+	void *ptmp;
+	struct ipt_table *t;
+	unsigned int num_counters;
+	char *name;
+	struct ipt_counters *paddc;
+	int ret, size;
+#ifdef CONFIG_COMPAT
+	struct compat_ipt_counters_info compat_tmp;
+
+	if (is_current_32bits()) {
+		ptmp = &compat_tmp;
+		size = sizeof(struct compat_ipt_counters_info);
+	} else
+#endif
+	{
+		ptmp = &tmp;
+		size = sizeof(struct ipt_counters_info);
+	}
+
+	if (copy_from_user(ptmp, user, size) != 0)
+		return -EFAULT;
+
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits()) {
+		num_counters = compat_tmp.num_counters;
+		name = compat_tmp.name;
+	} else
+#endif
+	{
+		num_counters = tmp.num_counters;
+		name = tmp.name;
+	}
+
+	if (len != size + num_counters * sizeof(struct ipt_counters))
+		return -EINVAL;
+
+	paddc = ub_vmalloc_best(len - size);
+	if (!paddc)
+		return -ENOMEM;
+
+	if (copy_from_user(paddc, user + size, len - size) != 0) {
+		ret = -EFAULT;
+		goto free;
+	}
+
+	t = ipt_find_table_lock(name, &ret, &ipt_mutex);
+	if (!t)
+		goto free;
+
+	write_lock_bh(&t->lock);
+	if (t->private->number != num_counters) {
+		ret = -EINVAL;
+		goto unlock_up_free;
+	}
+
+	i = 0;
+	IPT_ENTRY_ITERATE(t->private->tblentries[0],
+			  t->private->size,
+			  add_counter_to_entry,
+			  paddc,
+			  &i);
+ unlock_up_free:
+	write_unlock_bh(&t->lock);
+	up(&ipt_mutex);
+ free:
+	vfree(paddc);
+
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_ipt_replace {
+	char			name[IPT_TABLE_MAXNAMELEN];
+	u32			valid_hooks;
+	u32			num_entries;
+	u32			size;
+	u32			hook_entry[NF_IP_NUMHOOKS];
+	u32			underflow[NF_IP_NUMHOOKS];
+	u32			num_counters;
+	compat_uptr_t		counters;	/* struct ipt_counters * */
+	struct compat_ipt_entry	entries[0];
+};
+
+static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
+		void __user **dstptr, compat_uint_t *size)
+{
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, dstptr, (int *)size,
+							COMPAT_TO_USER);
+	else {
+		if (__copy_to_user(*dstptr, m, m->u.match_size))
+			return -EFAULT;
+		*dstptr += m->u.match_size;
+	}
+	return 0;
+}
+
+static int compat_copy_entry_to_user(struct ipt_entry *e,
+		void __user **dstptr, compat_uint_t *size)
+{
+	struct ipt_entry_target __user *t;
+	struct compat_ipt_entry __user *ce;
+	u_int16_t target_offset, next_offset;
+	compat_uint_t origsize;
+	int ret;
+
+	ret = -EFAULT;
+	origsize = *size;
+	ce = (struct compat_ipt_entry __user *)*dstptr;
+	if (__copy_to_user(ce, e, sizeof(struct ipt_entry)))
+		goto out;
+
+	*dstptr += sizeof(struct compat_ipt_entry);
+	ret = IPT_MATCH_ITERATE(e, compat_copy_match_to_user, dstptr, size);
+	target_offset = e->target_offset - (origsize - *size);
+	if (ret)
+		goto out;
+	t = ipt_get_target(e);
+	if (t->u.kernel.target->compat) {
+		ret = t->u.kernel.target->compat(t,
+				dstptr, (int *)size, COMPAT_TO_USER);
+		if (ret)
+			goto out;
+	} else {
+		ret = -EFAULT;
+		if (__copy_to_user(*dstptr, t, t->u.target_size))
+			goto out;
+		*dstptr += t->u.target_size;
+	}
+	ret = -EFAULT;
+	next_offset = e->next_offset - (origsize - *size);
+	if (__put_user(target_offset, &ce->target_offset))
+		goto out;
+	if (__put_user(next_offset, &ce->next_offset))
+		goto out;
+	return 0;
+out:
+	return ret;
+}
+
+static inline int
+compat_check_calc_match(struct ipt_entry_match *m,
+	    const char *name,
+	    const struct ipt_ip *ip,
+	    unsigned int hookmask,
+	    int *size, unsigned int *i)
+{
+	int ret;
+	struct ipt_match *match;
+
+	match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
+	if (!match) {
+		duprintf("compat_check_calc_match: `%s' not found\n",
+							m->u.user.name);
+		return ret;
+	}
+	if (!try_module_get(match->me)) {
+		up(&ipt_mutex);
+		return -ENOENT;
+	}
+	m->u.kernel.match = match;
+	up(&ipt_mutex);
+
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
+
+	(*i)++;
+	return 0;
+}
+
+static inline int
+check_compat_entry_size_and_hooks(struct ipt_entry *e,
+			   struct ipt_table_info *newinfo,
+			   unsigned char *base,
+			   unsigned char *limit,
+			   unsigned int *hook_entries,
+			   unsigned int *underflows,
+			   unsigned int *i,
+			   const char *name)
+{
+	struct ipt_entry_target *t;
+	struct ipt_target *target;
+	unsigned int entry_offset;
+	int ret, off, h;
+	unsigned int j;
+
+	duprintf("check_compat_entry_size_and_hooks %p\n", e);
+	if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
+	    || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
+		duprintf("Bad offset %p, limit = %p\n", e, limit);
+		return -EINVAL;
+	}
+
+	if (e->next_offset < sizeof(struct compat_ipt_entry) +
+			sizeof(struct compat_ipt_entry_target)) {
+		duprintf("checking: element %p size %u\n",
+			 e, e->next_offset);
+		return -EINVAL;
+	}
+
+	ret = check_entry(e, name);
+	if (ret != 0)
+		return ret;
+
+	off = 0;
+	entry_offset = (void *)e - (void *)base;
+	j = 0;
+	ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
+			e->comefrom, &off, &j);
+	if (ret != 0)
+		goto out;
+
+	t = ipt_get_target(e);
+	target = ipt_find_target_lock(t->u.user.name, &ret, &ipt_mutex);
+	if (!target) {
+		duprintf("check_compat_entry_size_and_hooks: `%s'"
+					" not found\n", t->u.user.name);
+		goto out;
+	}
+	if (!try_module_get(target->me)) {
+		up(&ipt_mutex);
+		ret = -ENOENT;
+		goto out;
+	}
+	t->u.kernel.target = target;
+	up(&ipt_mutex);
+
+	if (t->u.kernel.target->compat)
+		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
+	newinfo->size += off;
+	ret = compat_add_offset(entry_offset, off);
+	if (ret)
+		goto out_put;
+
+	/* Check hooks & underflows */
+	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+		if ((unsigned char *)e - base == hook_entries[h])
+			newinfo->hook_entry[h] = hook_entries[h];
+		if ((unsigned char *)e - base == underflows[h])
+			newinfo->underflow[h] = underflows[h];
+	}
+
+	/* Clear counters and comefrom */
+	e->counters = ((struct ipt_counters) { 0, 0 });
+	e->comefrom = 0;
+
+	(*i)++;
+	return 0;
+out_put:
+	module_put(target->me);
+out:
+	IPT_MATCH_ITERATE(e, cleanup_match, &j);
+	return ret;
+}
+
+static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
+	void **dstptr, compat_uint_t *size, const char *name,
+	const struct ipt_ip *ip, unsigned int hookmask)
+{
+	struct ipt_entry_match *dm;
+
+	dm = (struct ipt_entry_match *)*dstptr;
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, dstptr, (int *)size,
+							COMPAT_FROM_USER);
+	else {
+		memcpy(*dstptr, m, m->u.match_size);
+		*dstptr += m->u.match_size;
+	}
+
+	return 0;
+}
+
+static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
+	unsigned int *size, const char *name,
+	struct ipt_table_info *newinfo, unsigned char *base)
+{
+	struct ipt_entry_target *t;
+	struct ipt_entry *de;
+	unsigned int origsize;
+	int ret, h;
+
+	origsize = *size;
+	de = (struct ipt_entry *)*dstptr;
+	memcpy(de, e, sizeof(struct ipt_entry));
+
+	*dstptr += sizeof(struct compat_ipt_entry);
+	ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
+			name, &de->ip, de->comefrom);
+	if (ret)
+		return ret;
+	de->target_offset = e->target_offset - (origsize - *size);
+	t = ipt_get_target(e);
+	if (t->u.kernel.target->compat)
+		t->u.kernel.target->compat(t,
+				dstptr, (int *)size, COMPAT_FROM_USER);
+	else {
+		memcpy(*dstptr, t, t->u.target_size);
+		*dstptr += t->u.target_size;
+	}
+
+	de->next_offset = e->next_offset - (origsize - *size);
+	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+		if ((unsigned char *)de - base < newinfo->hook_entry[h])
+			newinfo->hook_entry[h] -= origsize - *size;
+		if ((unsigned char *)de - base < newinfo->underflow[h])
+			newinfo->underflow[h] -= origsize - *size;
+	}
+
+	return 0;
+}
+
+static inline int compat_check_entry(struct ipt_entry *e,
+					const char *name, unsigned int size)
+{
+	int ret;
+
+	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom);
+	if (ret != 0)
+		return ret;
+
+	return check_target(e, name, size);
+}
+
+static int
+translate_compat_table(const char *name,
+		unsigned int valid_hooks,
+		struct ipt_table_info **pinfo,
+		unsigned int total_size,
+		unsigned int number,
+		unsigned int *hook_entries,
+		unsigned int *underflows)
+{
+	unsigned int i, j;
+	struct ipt_table_info *newinfo, *info;
+	void *pos, *entry;
+	unsigned int size;
+	int ret;
+
+	info = *pinfo;
+	info->size = total_size;
+	info->number = number;
+
+	/* Init all hooks to impossible value. */
+	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+		info->hook_entry[i] = 0xFFFFFFFF;
+		info->underflow[i] = 0xFFFFFFFF;
+	}
+
+	duprintf("translate_compat_table: size %u\n", info->size);
+	i = 0;
+	down(&compat_ipt_mutex);
+	/* Walk through entries, checking offsets. */
+	ret = IPT_ENTRY_ITERATE(info->tblentries[0], total_size,
+				check_compat_entry_size_and_hooks,
+				info, info->tblentries[0],
+				info->tblentries[0] + total_size,
+				hook_entries, underflows, &i, name);
+	if (ret != 0)
+		goto out_unlock;
+
+	ret = -EINVAL;
+	if (i != number) {
+		duprintf("translate_compat_table: %u not %u entries\n",
+			 i, number);
+		goto out_unlock;
+	}
+
+	/* Check hooks all assigned */
+	for (j = 0; j < NF_IP_NUMHOOKS; j++) {
+		/* Only hooks which are valid */
+		if (!(valid_hooks & (1 << j)))
+			continue;
+		if (info->hook_entry[j] == 0xFFFFFFFF) {
+			duprintf("Invalid hook entry %u %u\n",
+				 j, hook_entries[j]);
+			goto out_unlock;
+		}
+		if (info->underflow[j] == 0xFFFFFFFF) {
+			duprintf("Invalid underflow %u %u\n",
+				 j, underflows[j]);
+			goto out_unlock;
+		}
+	}
+
+	ret = -ENOMEM;
+	newinfo = ipt_alloc_table_info(info->size);
+	if (!newinfo)
+		goto out_unlock;
+
+	newinfo->number = info->number;
+	for (j = 0; j < NF_IP_NUMHOOKS; j++) {
+		newinfo->hook_entry[j] = info->hook_entry[j];
+		newinfo->underflow[j] = info->underflow[j];
+	}
+
+	pos = entry = newinfo->tblentries[0];
+	size =  total_size;
+	ret = IPT_ENTRY_ITERATE(info->tblentries[0], total_size,
+			compat_copy_entry_from_user, &pos, &size,
+			name, newinfo, entry);
+	compat_flush_offsets();
+	up(&compat_ipt_mutex);
+	if (ret)
+		goto free_newinfo;
+
+	ret = -ELOOP;
+	if (!mark_source_chains(newinfo, valid_hooks))
+		goto free_newinfo;
+
+	ret = IPT_ENTRY_ITERATE(entry, newinfo->size,
+				compat_check_entry, name, size);
+	if (ret)
+		goto free_newinfo;
+
+	/* And one copy for every other CPU */
+	for_each_cpu(i) {
+		if (newinfo->tblentries[i] && newinfo->tblentries[i] != entry)
+			memcpy(newinfo->tblentries[i], entry, newinfo->size);
+	}
+
+	*pinfo = newinfo;
+	ipt_free_table_info(info);
+	return 0;
+
+free_newinfo:
+	IPT_ENTRY_ITERATE(info->tblentries[0], total_size,
+			cleanup_entry, &i);
+	ipt_free_table_info(newinfo);
+out:
+	return ret;
+out_unlock:
+	IPT_ENTRY_ITERATE(info->tblentries[0], total_size,
+			cleanup_entry, &i);
+	compat_flush_offsets();
+	up(&compat_ipt_mutex);
+	goto out;
+}
+
+static int
+compat_do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct compat_ipt_replace tmp;
+	struct ipt_table_info *newinfo;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* Hack: Causes ipchains to give correct error msg --RR */
+	if (len != sizeof(tmp) + tmp.size)
+		return -ENOPROTOOPT;
+
+	/* overflow check */
+	if (tmp.num_counters >= INT_MAX / sizeof(struct ipt_counters))
+		return -ENOMEM;
+
+	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
+		return -ENOMEM;
 
-	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
-	if (!oldinfo)
-		goto put_module;
+	newinfo = ipt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
 
-	/* Update module usage count based on number of rules */
-	duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
-		oldinfo->number, oldinfo->initial_entries, newinfo->number);
-	if ((oldinfo->number > oldinfo->initial_entries) || 
-	    (newinfo->number <= oldinfo->initial_entries)) 
-		module_put(t->me);
-	if ((oldinfo->number > oldinfo->initial_entries) &&
-	    (newinfo->number <= oldinfo->initial_entries))
-		module_put(t->me);
+	if (copy_from_user(newinfo->tblentries[0], user + sizeof(tmp), tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
 
+	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+			      &newinfo, tmp.size, tmp.num_entries,
+			      tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
 
-	/* Get the old counters. */
-	get_counters(oldinfo, counters);
-	/* Decrease module usage counts and free resource */
-	IPT_ENTRY_ITERATE(oldinfo->tblentries[0], oldinfo->size,
-				cleanup_entry,NULL);
-	ipt_free_table_info(oldinfo);
-	/* Silent error: too late now. */
-	copy_to_user(tmp.counters, counters,
-		     sizeof(struct ipt_counters) * tmp.num_counters);
-	vfree(counters);
-	up(&ipt_mutex);
+	duprintf("compat_do_replace: Translated table\n");
+
+	ret = __do_replace(tmp.name, tmp.valid_hooks,
+			      newinfo, tmp.size, tmp.num_counters,
+			      compat_ptr(tmp.counters));
+	if (ret)
+		goto free_newinfo_untrans;
 	return 0;
 
- put_module:
-	module_put(t->me);
- free_newinfo_counters_untrans_unlock:
-	up(&ipt_mutex);
- free_newinfo_counters_untrans:
-	IPT_ENTRY_ITERATE(newinfo->tblentries[0], newinfo->size,
-				cleanup_entry,NULL);
- free_newinfo_counters:
-	vfree(counters);
+ free_newinfo_untrans:
+	IPT_ENTRY_ITERATE(newinfo->tblentries[0], newinfo->size, cleanup_entry,NULL);
  free_newinfo:
 	ipt_free_table_info(newinfo);
 	return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static inline int
-add_counter_to_entry(struct ipt_entry *e,
-		     const struct ipt_counters addme[],
-		     unsigned int *i)
+struct compat_ipt_get_entries
 {
-#if 0
-	duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
-		 *i,
-		 (long unsigned int)e->counters.pcnt,
-		 (long unsigned int)e->counters.bcnt,
-		 (long unsigned int)addme[*i].pcnt,
-		 (long unsigned int)addme[*i].bcnt);
-#endif
+	char name[IPT_TABLE_MAXNAMELEN];
+	compat_uint_t size;
+	struct compat_ipt_entry entrytable[0];
+};
 
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+static int compat_copy_entries_to_user(unsigned int total_size,
+		     struct ipt_table *table, void __user *userptr)
+{
+	unsigned int off, num;
+	struct compat_ipt_entry e;
+	struct ipt_counters *counters;
+	void __user *pos;
+	unsigned int size;
+	int ret = 0;
 
-	(*i)++;
-	return 0;
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
+	/* ... then copy entire thing from CPU 0... */
+	pos = userptr;
+	size = total_size;
+	ret = IPT_ENTRY_ITERATE(table->private->tblentries[0],
+			total_size, compat_copy_entry_to_user, &pos, &size);
+
+	/* ... then go back and fix counters and names */
+	for (off = 0, num = 0; off < size; off += e.next_offset, num++) {
+		unsigned int i;
+		struct ipt_entry_match m;
+		struct ipt_entry_target t;
+
+		ret = -EFAULT;
+		if (copy_from_user(&e, userptr + off,
+					sizeof(struct compat_ipt_entry)))
+			goto free_counters;
+		if (copy_to_user(userptr + off +
+			offsetof(struct compat_ipt_entry, counters),
+			 &counters[num], sizeof(counters[num])))
+			goto free_counters;
+
+		for (i = sizeof(struct compat_ipt_entry);
+				i < e.target_offset; i += m.u.match_size) {
+			if (copy_from_user(&m, userptr + off + i,
+					sizeof(struct ipt_entry_match)))
+				goto free_counters;
+			if (copy_to_user(userptr + off + i +
+				offsetof(struct ipt_entry_match, u.user.name),
+				m.u.kernel.match->name,
+				strlen(m.u.kernel.match->name) + 1))
+				goto free_counters;
+		}
+
+		if (copy_from_user(&t, userptr + off + e.target_offset,
+					sizeof(struct ipt_entry_target)))
+			goto free_counters;
+		if (copy_to_user(userptr + off + e.target_offset +
+			offsetof(struct ipt_entry_target, u.user.name),
+			t.u.kernel.target->name,
+			strlen(t.u.kernel.target->name) + 1))
+			goto free_counters;
+	}
+	ret = 0;
+free_counters:
+	vfree(counters);
+	return ret;
 }
 
 static int
-do_add_counters(void __user *user, unsigned int len)
+compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
 {
-	unsigned int i;
-	struct ipt_counters_info tmp, *paddc;
-	struct ipt_table *t;
 	int ret;
+	struct compat_ipt_get_entries get;
+	struct ipt_table *t;
 
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
+	if (*len < sizeof(get)) {
+		duprintf("compat_get_entries: %u < %u\n",
+				*len, (unsigned int)sizeof(get));
 		return -EINVAL;
+	}
 
-	paddc = vmalloc(len);
-	if (!paddc)
-		return -ENOMEM;
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
 
-	if (copy_from_user(paddc, user, len) != 0) {
-		ret = -EFAULT;
-		goto free;
+	if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
+		duprintf("compat_get_entries: %u != %u\n", *len,
+			(unsigned int)(sizeof(struct compat_ipt_get_entries) +
+			get.size));
+		return -EINVAL;
 	}
 
-	t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
-	if (!t)
-		goto free;
+	down(&compat_ipt_mutex);
+	t = ipt_find_table_lock(get.name, &ret, &ipt_mutex);
+	if (t) {
+		struct ipt_table_info info;
+		duprintf("t->private->number = %u\n",
+			 t->private->number);
+		ret = compat_table_info(t->private, &info);
+		if (!ret && get.size == info.size) {
+			ret = compat_copy_entries_to_user(t->private->size,
+						   t, uptr->entrytable);
+		} else if (!ret) {
+			duprintf("compat_get_entries: I've got %u not %u!\n",
+				 t->private->size,
+				 get.size);
+			ret = -EINVAL;
+		}
+		compat_flush_offsets();
+		up(&ipt_mutex);
+	} else
+		duprintf("compat_get_entries: Can't find %s!\n",
+			 get.name);
+	up(&compat_ipt_mutex);
+	return ret;
+}
 
-	write_lock_bh(&t->lock);
-	if (t->private->number != tmp.num_counters) {
+static int
+compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	int ret;
+
+	switch (cmd) {
+	case IPT_SO_GET_INFO:
+		ret = get_info(user, len);
+		break;
+	case IPT_SO_GET_ENTRIES:
+		ret = compat_get_entries(user, len);
+		break;
+	default:
+		duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd);
 		ret = -EINVAL;
-		goto unlock_up_free;
 	}
-
-	i = 0;
-	IPT_ENTRY_ITERATE(t->private->tblentries[0],
-			  t->private->size,
-			  add_counter_to_entry,
-			  paddc->counters,
-			  &i);
- unlock_up_free:
-	write_unlock_bh(&t->lock);
-	up(&ipt_mutex);
- free:
-	vfree(paddc);
-
 	return ret;
 }
+#endif
 
 static int
 do_ipt_set_ctl(struct sock *sk,	int cmd, void __user *user, unsigned int len)
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits() && (cmd == IPT_SO_SET_REPLACE))
+		return compat_do_replace(user, len);
+#endif
+
 	switch (cmd) {
 	case IPT_SO_SET_REPLACE:
 		ret = do_replace(user, len);
@@ -1289,65 +2465,22 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
-	switch (cmd) {
-	case IPT_SO_GET_INFO: {
-		char name[IPT_TABLE_MAXNAMELEN];
-		struct ipt_table *t;
-
-		if (*len != sizeof(struct ipt_getinfo)) {
-			duprintf("length %u != %u\n", *len,
-				 sizeof(struct ipt_getinfo));
-			ret = -EINVAL;
-			break;
-		}
-
-		if (copy_from_user(name, user, sizeof(name)) != 0) {
-			ret = -EFAULT;
-			break;
-		}
-		name[IPT_TABLE_MAXNAMELEN-1] = '\0';
-		t = ipt_find_table_lock(name, &ret, &ipt_mutex);
-		if (t) {
-			struct ipt_getinfo info;
-
-			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, t->private->hook_entry,
-			       sizeof(info.hook_entry));
-			memcpy(info.underflow, t->private->underflow,
-			       sizeof(info.underflow));
-			info.num_entries = t->private->number;
-			info.size = t->private->size;
-			strcpy(info.name, name);
-
-			if (copy_to_user(user, &info, *len) != 0)
-				ret = -EFAULT;
-			else
-				ret = 0;
-
-			up(&ipt_mutex);
-		}
-	}
-	break;
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits())
+		return compat_do_ipt_get_ctl(sk, cmd, user, len);
+#endif
 
-	case IPT_SO_GET_ENTRIES: {
-		struct ipt_get_entries get;
+	switch (cmd) {
+	case IPT_SO_GET_INFO:
+		ret = get_info(user, len);
+		break;
 
-		if (*len < sizeof(get)) {
-			duprintf("get_entries: %u < %u\n", *len, sizeof(get));
-			ret = -EINVAL;
-		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
-			ret = -EFAULT;
-		} else if (*len != sizeof(struct ipt_get_entries) + get.size) {
-			duprintf("get_entries: %u != %u\n", *len,
-				 sizeof(struct ipt_get_entries) + get.size);
-			ret = -EINVAL;
-		} else
-			ret = get_entries(&get, user);
+	case IPT_SO_GET_ENTRIES:
+		ret = get_entries(user, len);
 		break;
-	}
 
 	default:
 		duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
@@ -1367,7 +2500,7 @@ ipt_register_target(struct ipt_target *t
 	if (ret != 0)
 		return ret;
 
-	if (!list_named_insert(&ipt_target, target)) {
+	if (!list_named_insert(&ve_ipt_target, target)) {
 		duprintf("ipt_register_target: `%s' already in list!\n",
 			 target->name);
 		ret = -EINVAL;
@@ -1376,12 +2509,60 @@ ipt_register_target(struct ipt_target *t
 	return ret;
 }
 
+int
+visible_ipt_register_target(struct ipt_target *target)
+{
+	int ret;
+	struct module *mod = target->me;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct ipt_target *tmp;
+		__module_get(mod);
+		ret = -ENOMEM;
+		tmp = kmalloc(sizeof(struct ipt_target), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, target, sizeof(struct ipt_target));
+		target = tmp;
+	}
+
+	ret = ipt_register_target(target);
+	if (ret)
+		goto out;
+
+	return 0;
+out:
+	if (!ve_is_super(get_exec_env())) {
+		kfree(target);
+nomem:
+		module_put(mod);
+	}
+	return ret;
+}
+
 void
 ipt_unregister_target(struct ipt_target *target)
 {
 	down(&ipt_mutex);
-	LIST_DELETE(&ipt_target, target);
+	LIST_DELETE(&ve_ipt_target, target);
+	up(&ipt_mutex);
+}
+
+void
+visible_ipt_unregister_target(struct ipt_target *target)
+{
+	down(&ipt_mutex);
+	target = list_named_find(&ve_ipt_target, target->name);
 	up(&ipt_mutex);
+	if (!target)
+		return;
+
+	ipt_unregister_target(target);
+
+	if (!ve_is_super(get_exec_env())) {
+		module_put(target->me);
+		kfree(target);
+	}
 }
 
 int
@@ -1393,13 +2574,43 @@ ipt_register_match(struct ipt_match *mat
 	if (ret != 0)
 		return ret;
 
-	if (!list_named_insert(&ipt_match, match)) {
+	if (!list_named_insert(&ve_ipt_match, match)) {
 		duprintf("ipt_register_match: `%s' already in list!\n",
 			 match->name);
 		ret = -EINVAL;
 	}
 	up(&ipt_mutex);
+	return ret;
+}
+
+int
+visible_ipt_register_match(struct ipt_match *match)
+{
+	int ret;
+	struct module *mod = match->me;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct ipt_match *tmp;
+		__module_get(mod);
+		ret = -ENOMEM;
+		tmp = kmalloc(sizeof(struct ipt_match), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, match, sizeof(struct ipt_match));
+		match = tmp;
+	}
+
+	ret = ipt_register_match(match);
+	if (ret)
+		goto out;
 
+	return 0;
+out:
+	if (!ve_is_super(get_exec_env())) {
+		kfree(match);
+nomem:
+		module_put(mod);
+	}
 	return ret;
 }
 
@@ -1407,7 +2618,38 @@ void
 ipt_unregister_match(struct ipt_match *match)
 {
 	down(&ipt_mutex);
-	LIST_DELETE(&ipt_match, match);
+	LIST_DELETE(&ve_ipt_match, match);
+	up(&ipt_mutex);
+}
+
+void
+visible_ipt_unregister_match(struct ipt_match *match)
+{
+	down(&ipt_mutex);
+	match = list_named_find(&ve_ipt_match, match->name);
+	up(&ipt_mutex);
+	if (!match)
+		return;
+
+	ipt_unregister_match(match);
+
+	if (!ve_is_super(get_exec_env())) {
+		module_put(match->me);
+		kfree(match);
+	}
+}
+
+void ipt_flush_table(struct ipt_table *table)
+{
+	if (table == NULL)
+		return;
+
+	down(&ipt_mutex);
+	IPT_ENTRY_ITERATE(table->private->tblentries[smp_processor_id()],
+			table->private->size, cleanup_entry, NULL);
+	if (table->private->number > table->private->initial_entries)
+		module_put(table->me);
+	table->private->size = 0;
 	up(&ipt_mutex);
 }
 
@@ -1415,12 +2657,11 @@ int ipt_register_table(struct ipt_table 
 {
 	int ret;
 	struct ipt_table_info *newinfo;
-	static struct ipt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
 
+	ret = -ENOMEM;
 	newinfo = ipt_alloc_table_info(table->table->size);
 	if (!newinfo)
-		return -ENOMEM;
+		goto out;
 
 	memcpy(newinfo->tblentries[0], table->table->entries, table->table->size);
 
@@ -1429,52 +2670,54 @@ int ipt_register_table(struct ipt_table 
 			      table->table->num_entries,
 			      table->table->hook_entry,
 			      table->table->underflow);
-	if (ret != 0) {
-		ipt_free_table_info(newinfo);
-		return ret;
-	}
+	if (ret != 0)
+		goto out_free;
 
 	ret = down_interruptible(&ipt_mutex);
-	if (ret != 0) {
-		ipt_free_table_info(newinfo);
-		return ret;
-	}
+	if (ret != 0)
+		goto out_free;
 
 	/* Don't autoload: we'd eat our tail... */
-	if (list_named_find(&ipt_tables, table->name)) {
-		ret = -EEXIST;
-		goto free_unlock;
-	}
+	ret = -EEXIST;
+	if (list_named_find(&ve_ipt_tables, table->name))
+		goto out_free_unlock;
 
-	/* Simplifies replace_table code. */
-	table->private = &bootstrap;
-	if (!replace_table(table, 0, newinfo, &ret))
-		goto free_unlock;
+	table->lock = RW_LOCK_UNLOCKED;
+	ret = setup_table(table, newinfo);
+	if (ret)
+		goto out_free_unlock;
 
 	duprintf("table->private->number = %u\n",
 		 table->private->number);
-	
+
 	/* save number of initial entries */
 	table->private->initial_entries = table->private->number;
 
-	table->lock = RW_LOCK_UNLOCKED;
-	list_prepend(&ipt_tables, table);
+	list_prepend(&ve_ipt_tables, table);
 
- unlock:
 	up(&ipt_mutex);
-	return ret;
+	return 0;
 
- free_unlock:
+out_free_unlock:
+	up(&ipt_mutex);
+out_free:
 	ipt_free_table_info(newinfo);
-	goto unlock;
+out:
+	return ret;
 }
 
 void ipt_unregister_table(struct ipt_table *table)
 {
 	down(&ipt_mutex);
-	LIST_DELETE(&ipt_tables, table);
+	LIST_DELETE(&ve_ipt_tables, table);
 	up(&ipt_mutex);
 
+	/* size to uncharge taken from ipt_register_table */
+#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
+	uncharge_iptables(ipt_table_info_ub(table->private),
+				table->private->number);
+#endif
+
 	/* Decrease module usage counts and free resources */
 	IPT_ENTRY_ITERATE(table->private->tblentries[0], table->private->size,
 			  cleanup_entry, NULL);
@@ -1656,8 +2899,8 @@ udp_checkentry(const char *tablename,
 		return 0;
 	}
 	if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
-		duprintf("ipt_udp: matchsize %u != %u\n",
-			 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
+		duprintf("ipt_udp: matchsize %u != %u\n", matchinfosize,
+			(unsigned int)IPT_ALIGN(sizeof(struct ipt_udp)));
 		return 0;
 	}
 	if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
@@ -1732,6 +2975,9 @@ icmp_checkentry(const char *tablename,
 /* The built-in targets: standard (NULL) and error. */
 static struct ipt_target ipt_standard_target = {
 	.name		= IPT_STANDARD_TARGET,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat_ipt_standard_fn,
+#endif
 };
 
 static struct ipt_target ipt_error_target = {
@@ -1753,18 +2999,27 @@ static struct ipt_match tcp_matchstruct 
 	.name		= "tcp",
 	.match		= &tcp_match,
 	.checkentry	= &tcp_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &tcp_compat,
+#endif
 };
 
 static struct ipt_match udp_matchstruct = {
 	.name		= "udp",
 	.match		= &udp_match,
 	.checkentry	= &udp_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &udp_compat,
+#endif
 };
 
 static struct ipt_match icmp_matchstruct = {
 	.name		= "icmp",
 	.match		= &icmp_match,
 	.checkentry	= &icmp_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &icmp_compat,
+#endif
 };
 
 #ifdef CONFIG_PROC_FS
@@ -1790,7 +3045,7 @@ static inline int print_target(const str
                                off_t start_offset, char *buffer, int length,
                                off_t *pos, unsigned int *count)
 {
-	if (t == &ipt_standard_target || t == &ipt_error_target)
+	if (t == &ve_ipt_standard_target || t == &ve_ipt_error_target)
 		return 0;
 	return print_name((char *)t, start_offset, buffer, length, pos, count);
 }
@@ -1800,10 +3055,16 @@ static int ipt_get_tables(char *buffer, 
 	off_t pos = 0;
 	unsigned int count = 0;
 
+#ifdef CONFIG_VE_IPTABLES
+	/* if we don't initialized for current VE exiting */
+	if (&ve_ipt_standard_target == NULL)
+		return 0;
+#endif
+
 	if (down_interruptible(&ipt_mutex) != 0)
 		return 0;
 
-	LIST_FIND(&ipt_tables, print_name, void *,
+	LIST_FIND(&ve_ipt_tables, print_name, void *,
 		  offset, buffer, length, &pos, &count);
 
 	up(&ipt_mutex);
@@ -1818,10 +3079,15 @@ static int ipt_get_targets(char *buffer,
 	off_t pos = 0;
 	unsigned int count = 0;
 
+#ifdef CONFIG_VE_IPTABLES
+	/* if we don't initialized for current VE exiting */
+	if (&ve_ipt_standard_target == NULL)
+		return 0;
+#endif
 	if (down_interruptible(&ipt_mutex) != 0)
 		return 0;
 
-	LIST_FIND(&ipt_target, print_target, struct ipt_target *,
+	LIST_FIND(&ve_ipt_target, print_target, struct ipt_target *,
 		  offset, buffer, length, &pos, &count);
 	
 	up(&ipt_mutex);
@@ -1835,10 +3101,15 @@ static int ipt_get_matches(char *buffer,
 	off_t pos = 0;
 	unsigned int count = 0;
 
+#ifdef CONFIG_VE_IPTABLES
+	/* if we don't initialized for current VE exiting */
+	if (&ve_ipt_standard_target == NULL)
+		return 0;
+#endif
 	if (down_interruptible(&ipt_mutex) != 0)
 		return 0;
 	
-	LIST_FIND(&ipt_match, print_name, void *,
+	LIST_FIND(&ve_ipt_match, print_name, void *,
 		  offset, buffer, length, &pos, &count);
 
 	up(&ipt_mutex);
@@ -1854,6 +3125,7 @@ static struct { char *name; get_info_t *
   { NULL, NULL} };
 #endif /*CONFIG_PROC_FS*/
 
+void fini_iptables(void);
 static int __init init(void)
 {
 	int ret;
@@ -1898,11 +3170,132 @@ static int __init init(void)
 #endif
 
 	printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
+
+#if defined(CONFIG_VE_IPTABLES)
+	/* init ve0 */
+	ret = init_iptables();
+	if (ret == 0) {
+		KSYMRESOLVE(init_iptables);
+		KSYMRESOLVE(fini_iptables);
+		KSYMRESOLVE(ipt_flush_table);
+		KSYMMODRESOLVE(ip_tables);
+	}
+#else
+	ret = 0;
+#endif
+	return ret;
+}
+
+#ifdef CONFIG_VE_IPTABLES
+/* alloc helper */
+#define ALLOC_ENVF(field,label) \
+		if ( !(envid->field = kmalloc(sizeof(*(envid->field)), GFP_KERNEL)) ) \
+				goto label;
+int init_iptables(void)
+{
+	struct ve_struct *envid;
+
+	envid = get_exec_env();
+
+	if (ve_is_super(envid)) {
+		envid->_ipt_target = &ipt_target;
+		envid->_ipt_match = &ipt_match;
+		envid->_ipt_tables = &ipt_tables;
+
+		envid->_ipt_standard_target = &ipt_standard_target;
+		envid->_ipt_error_target = &ipt_error_target;
+		envid->_tcp_matchstruct = &tcp_matchstruct;
+		envid->_udp_matchstruct = &udp_matchstruct;
+		envid->_icmp_matchstruct = &icmp_matchstruct;
+	} else {
+		/* allocate structures in ve_struct */
+		ALLOC_ENVF(_ipt_target,nomem0);
+		ALLOC_ENVF(_ipt_match,nomem1);
+		ALLOC_ENVF(_ipt_tables,nomem2);
+		ALLOC_ENVF(_ipt_standard_target,nomem3);
+		ALLOC_ENVF(_ipt_error_target,nomem4);
+		ALLOC_ENVF(_tcp_matchstruct,nomem5);
+		ALLOC_ENVF(_udp_matchstruct,nomem6);
+		ALLOC_ENVF(_icmp_matchstruct,nomem7);
+
+		/* FIXME: charge ubc */
+		INIT_LIST_HEAD(envid->_ipt_target);
+		INIT_LIST_HEAD(envid->_ipt_match);
+		INIT_LIST_HEAD(envid->_ipt_tables);
+
+		memcpy(envid->_ipt_standard_target, &ipt_standard_target,
+						sizeof(ipt_standard_target));
+		memcpy(envid->_ipt_error_target, &ipt_error_target,
+						sizeof(ipt_error_target));
+		memcpy(envid->_tcp_matchstruct, &tcp_matchstruct,
+						sizeof(tcp_matchstruct));
+		memcpy(envid->_udp_matchstruct, &udp_matchstruct,
+						sizeof(udp_matchstruct));
+		memcpy(envid->_icmp_matchstruct, &icmp_matchstruct,
+						sizeof(icmp_matchstruct));
+
+		down(&ipt_mutex);
+		list_append(envid->_ipt_target, envid->_ipt_standard_target);
+		list_append(envid->_ipt_target, envid->_ipt_error_target);
+		list_append(envid->_ipt_match, envid->_tcp_matchstruct);
+		list_append(envid->_ipt_match, envid->_udp_matchstruct);
+		list_append(envid->_ipt_match, envid->_icmp_matchstruct);
+		up(&ipt_mutex);
+	}
+
 	return 0;
+
+nomem7:
+	kfree(envid->_udp_matchstruct); envid->_udp_matchstruct = NULL;
+nomem6:
+	kfree(envid->_tcp_matchstruct); envid->_tcp_matchstruct = NULL;
+nomem5:
+	kfree(envid->_ipt_error_target); envid->_ipt_error_target = NULL;
+nomem4:
+	kfree(envid->_ipt_standard_target); envid->_ipt_standard_target = NULL;
+nomem3:
+	kfree(envid->_ipt_tables); envid->_ipt_tables = NULL;
+nomem2:
+	kfree(envid->_ipt_match); envid->_ipt_match = NULL;
+nomem1:
+	kfree(envid->_ipt_target); envid->_ipt_target = NULL;
+nomem0:
+	return -ENOMEM;
+}
+
+void fini_iptables(void)
+{
+	/* some cleanup */
+	struct ve_struct *envid = get_exec_env();
+
+	if (envid->_ipt_tables != NULL && !ve_is_super(envid)) {
+		kfree(envid->_ipt_tables);
+		kfree(envid->_ipt_target);
+		kfree(envid->_ipt_match);
+		kfree(envid->_ipt_standard_target);
+		kfree(envid->_ipt_error_target);
+		kfree(envid->_tcp_matchstruct);
+		kfree(envid->_udp_matchstruct);
+		kfree(envid->_icmp_matchstruct);
+	}
+
+	envid->_ipt_tables = NULL;
+	envid->_ipt_target = NULL;
+	envid->_ipt_match = NULL;
+	envid->_ipt_standard_target = NULL;
+	envid->_ipt_error_target = NULL;
+	envid->_tcp_matchstruct = NULL;
+	envid->_udp_matchstruct = NULL;
+	envid->_icmp_matchstruct = NULL;
 }
+#endif
 
 static void __exit fini(void)
 {
+	KSYMMODUNRESOLVE(ip_tables);
+	KSYMUNRESOLVE(init_iptables);
+	KSYMUNRESOLVE(fini_iptables);
+	KSYMUNRESOLVE(ipt_flush_table);
 	nf_unregister_sockopt(&ipt_sockopts);
 #ifdef CONFIG_PROC_FS
 	{
@@ -1911,15 +3304,27 @@ static void __exit fini(void)
 		proc_net_remove(ipt_proc_entry[i].name);
 	}
 #endif
+#ifdef CONFIG_VE_IPTABLES
+	fini_iptables();
+#endif
 }
 
+EXPORT_SYMBOL(ipt_flush_table);
 EXPORT_SYMBOL(ipt_register_table);
 EXPORT_SYMBOL(ipt_unregister_table);
 EXPORT_SYMBOL(ipt_register_match);
 EXPORT_SYMBOL(ipt_unregister_match);
 EXPORT_SYMBOL(ipt_do_table);
+EXPORT_SYMBOL(visible_ipt_register_match);
+EXPORT_SYMBOL(visible_ipt_unregister_match);
 EXPORT_SYMBOL(ipt_register_target);
 EXPORT_SYMBOL(ipt_unregister_target);
+EXPORT_SYMBOL(visible_ipt_register_target);
+EXPORT_SYMBOL(visible_ipt_unregister_target);
+#ifdef CONFIG_COMPAT
+EXPORT_SYMBOL(ipt_match_align_compat);
+EXPORT_SYMBOL(ipt_target_align_compat);
+#endif
 
-module_init(init);
+subsys_initcall(init);
 module_exit(fini);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_CLASSIFY.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_CLASSIFY.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_CLASSIFY.c	2004-10-19 01:53:05.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_CLASSIFY.c	2011-06-15 19:26:19.000000000 +0400
@@ -48,7 +48,8 @@ checkentry(const char *tablename,
            unsigned int hook_mask)
 {
 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){
-		printk(KERN_ERR "CLASSIFY: invalid size (%u != %Zu).\n",
+		ve_printk(VE_LOG, KERN_ERR
+				"CLASSIFY: invalid size (%u != %Zu).\n",
 		       targinfosize,
 		       IPT_ALIGN(sizeof(struct ipt_classify_target_info)));
 		return 0;
@@ -56,13 +57,14 @@ checkentry(const char *tablename,
 	
 	if (hook_mask & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
 	                  (1 << NF_IP_POST_ROUTING))) {
-		printk(KERN_ERR "CLASSIFY: only valid in LOCAL_OUT, FORWARD "
+		ve_printk(VE_LOG, KERN_ERR
+				"CLASSIFY: only valid in LOCAL_OUT, FORWARD "
 		                "and POST_ROUTING.\n");
 		return 0;
 	}
 
 	if (strcmp(tablename, "mangle") != 0) {
-		printk(KERN_ERR "CLASSIFY: can only be called from "
+		ve_printk(VE_LOG, KERN_ERR "CLASSIFY: can only be called from "
 		                "\"mangle\" table, not \"%s\".\n",
 		                tablename);
 		return 0;
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_LOG.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_LOG.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_LOG.c	2004-10-19 01:55:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_LOG.c	2011-06-15 19:26:22.000000000 +0400
@@ -18,6 +18,7 @@
 #include <net/udp.h>
 #include <net/tcp.h>
 #include <net/route.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -49,32 +50,32 @@ static void dump_packet(const struct ipt
 
 	ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
 	if (ih == NULL) {
-		printk("TRUNCATED");
+		ve_printk(VE_LOG, "TRUNCATED");
 		return;
 	}
 
 	/* Important fields:
 	 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
 	/* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
-	printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
+	ve_printk(VE_LOG, "SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
 	       NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
 
 	/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
-	printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+	ve_printk(VE_LOG, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
 	       ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
 	       ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
 
 	/* Max length: 6 "CE DF MF " */
 	if (ntohs(ih->frag_off) & IP_CE)
-		printk("CE ");
+		ve_printk(VE_LOG, "CE ");
 	if (ntohs(ih->frag_off) & IP_DF)
-		printk("DF ");
+		ve_printk(VE_LOG, "DF ");
 	if (ntohs(ih->frag_off) & IP_MF)
-		printk("MF ");
+		ve_printk(VE_LOG, "MF ");
 
 	/* Max length: 11 "FRAG:65535 " */
 	if (ntohs(ih->frag_off) & IP_OFFSET)
-		printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+		ve_printk(VE_LOG, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
 
 	if ((info->logflags & IPT_LOG_IPOPT)
 	    && ih->ihl * 4 > sizeof(struct iphdr)) {
@@ -85,15 +86,15 @@ static void dump_packet(const struct ipt
 		op = skb_header_pointer(skb, iphoff+sizeof(_iph),
 					optsize, _opt);
 		if (op == NULL) {
-			printk("TRUNCATED");
+			ve_printk(VE_LOG, "TRUNCATED");
 			return;
 		}
 
 		/* Max length: 127 "OPT (" 15*4*2chars ") " */
-		printk("OPT (");
+		ve_printk(VE_LOG, "OPT (");
 		for (i = 0; i < optsize; i++)
-			printk("%02X", op[i]);
-		printk(") ");
+			ve_printk(VE_LOG, "%02X", op[i]);
+		ve_printk(VE_LOG, ") ");
 	}
 
 	switch (ih->protocol) {
@@ -101,7 +102,7 @@ static void dump_packet(const struct ipt
 		struct tcphdr _tcph, *th;
 
 		/* Max length: 10 "PROTO=TCP " */
-		printk("PROTO=TCP ");
+		ve_printk(VE_LOG, "PROTO=TCP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -110,41 +111,41 @@ static void dump_packet(const struct ipt
 		th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
 					sizeof(_tcph), &_tcph);
 		if (th == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		printk("SPT=%u DPT=%u ",
+		ve_printk(VE_LOG, "SPT=%u DPT=%u ",
 		       ntohs(th->source), ntohs(th->dest));
 		/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
 		if (info->logflags & IPT_LOG_TCPSEQ)
-			printk("SEQ=%u ACK=%u ",
+			ve_printk(VE_LOG, "SEQ=%u ACK=%u ",
 			       ntohl(th->seq), ntohl(th->ack_seq));
 		/* Max length: 13 "WINDOW=65535 " */
-		printk("WINDOW=%u ", ntohs(th->window));
+		ve_printk(VE_LOG, "WINDOW=%u ", ntohs(th->window));
 		/* Max length: 9 "RES=0x3F " */
-		printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+		ve_printk(VE_LOG, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
 		/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
 		if (th->cwr)
-			printk("CWR ");
+			ve_printk(VE_LOG, "CWR ");
 		if (th->ece)
-			printk("ECE ");
+			ve_printk(VE_LOG, "ECE ");
 		if (th->urg)
-			printk("URG ");
+			ve_printk(VE_LOG, "URG ");
 		if (th->ack)
-			printk("ACK ");
+			ve_printk(VE_LOG, "ACK ");
 		if (th->psh)
-			printk("PSH ");
+			ve_printk(VE_LOG, "PSH ");
 		if (th->rst)
-			printk("RST ");
+			ve_printk(VE_LOG, "RST ");
 		if (th->syn)
-			printk("SYN ");
+			ve_printk(VE_LOG, "SYN ");
 		if (th->fin)
-			printk("FIN ");
+			ve_printk(VE_LOG, "FIN ");
 		/* Max length: 11 "URGP=65535 " */
-		printk("URGP=%u ", ntohs(th->urg_ptr));
+		ve_printk(VE_LOG, "URGP=%u ", ntohs(th->urg_ptr));
 
 		if ((info->logflags & IPT_LOG_TCPOPT)
 		    && th->doff * 4 > sizeof(struct tcphdr)) {
@@ -157,15 +158,15 @@ static void dump_packet(const struct ipt
 						iphoff+ih->ihl*4+sizeof(_tcph),
 						optsize, _opt);
 			if (op == NULL) {
-				printk("TRUNCATED");
+				ve_printk(VE_LOG, "TRUNCATED");
 				return;
 			}
 
 			/* Max length: 127 "OPT (" 15*4*2chars ") " */
-			printk("OPT (");
+			ve_printk(VE_LOG, "OPT (");
 			for (i = 0; i < optsize; i++)
-				printk("%02X", op[i]);
-			printk(") ");
+				ve_printk(VE_LOG, "%02X", op[i]);
+			ve_printk(VE_LOG, ") ");
 		}
 		break;
 	}
@@ -173,7 +174,7 @@ static void dump_packet(const struct ipt
 		struct udphdr _udph, *uh;
 
 		/* Max length: 10 "PROTO=UDP " */
-		printk("PROTO=UDP ");
+		ve_printk(VE_LOG, "PROTO=UDP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -182,13 +183,13 @@ static void dump_packet(const struct ipt
 		uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
 					sizeof(_udph), &_udph);
 		if (uh == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		printk("SPT=%u DPT=%u LEN=%u ",
+		ve_printk(VE_LOG, "SPT=%u DPT=%u LEN=%u ",
 		       ntohs(uh->source), ntohs(uh->dest),
 		       ntohs(uh->len));
 		break;
@@ -214,7 +215,7 @@ static void dump_packet(const struct ipt
 			    [ICMP_ADDRESSREPLY] = 12 };
 
 		/* Max length: 11 "PROTO=ICMP " */
-		printk("PROTO=ICMP ");
+		ve_printk(VE_LOG, "PROTO=ICMP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -223,19 +224,19 @@ static void dump_packet(const struct ipt
 		ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
 					 sizeof(_icmph), &_icmph);
 		if (ich == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Max length: 18 "TYPE=255 CODE=255 " */
-		printk("TYPE=%u CODE=%u ", ich->type, ich->code);
+		ve_printk(VE_LOG, "TYPE=%u CODE=%u ", ich->type, ich->code);
 
 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
 		if (ich->type <= NR_ICMP_TYPES
 		    && required_len[ich->type]
 		    && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
@@ -244,19 +245,19 @@ static void dump_packet(const struct ipt
 		case ICMP_ECHOREPLY:
 		case ICMP_ECHO:
 			/* Max length: 19 "ID=65535 SEQ=65535 " */
-			printk("ID=%u SEQ=%u ",
+			ve_printk(VE_LOG, "ID=%u SEQ=%u ",
 			       ntohs(ich->un.echo.id),
 			       ntohs(ich->un.echo.sequence));
 			break;
 
 		case ICMP_PARAMETERPROB:
 			/* Max length: 14 "PARAMETER=255 " */
-			printk("PARAMETER=%u ",
+			ve_printk(VE_LOG, "PARAMETER=%u ",
 			       ntohl(ich->un.gateway) >> 24);
 			break;
 		case ICMP_REDIRECT:
 			/* Max length: 24 "GATEWAY=255.255.255.255 " */
-			printk("GATEWAY=%u.%u.%u.%u ",
+			ve_printk(VE_LOG, "GATEWAY=%u.%u.%u.%u ",
 			       NIPQUAD(ich->un.gateway));
 			/* Fall through */
 		case ICMP_DEST_UNREACH:
@@ -264,16 +265,16 @@ static void dump_packet(const struct ipt
 		case ICMP_TIME_EXCEEDED:
 			/* Max length: 3+maxlen */
 			if (!iphoff) { /* Only recurse once. */
-				printk("[");
+				ve_printk(VE_LOG, "[");
 				dump_packet(info, skb,
 					    iphoff + ih->ihl*4+sizeof(_icmph));
-				printk("] ");
+				ve_printk(VE_LOG, "] ");
 			}
 
 			/* Max length: 10 "MTU=65535 " */
 			if (ich->type == ICMP_DEST_UNREACH
 			    && ich->code == ICMP_FRAG_NEEDED)
-				printk("MTU=%u ", ntohs(ich->un.frag.mtu));
+				ve_printk(VE_LOG, "MTU=%u ", ntohs(ich->un.frag.mtu));
 		}
 		break;
 	}
@@ -285,26 +286,26 @@ static void dump_packet(const struct ipt
 			break;
 		
 		/* Max length: 9 "PROTO=AH " */
-		printk("PROTO=AH ");
+		ve_printk(VE_LOG, "PROTO=AH ");
 
 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
 		ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
 					sizeof(_ahdr), &_ahdr);
 		if (ah == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Length: 15 "SPI=0xF1234567 " */
-		printk("SPI=0x%x ", ntohl(ah->spi));
+		ve_printk(VE_LOG, "SPI=0x%x ", ntohl(ah->spi));
 		break;
 	}
 	case IPPROTO_ESP: {
 		struct ip_esp_hdr _esph, *eh;
 
 		/* Max length: 10 "PROTO=ESP " */
-		printk("PROTO=ESP ");
+		ve_printk(VE_LOG, "PROTO=ESP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -313,18 +314,18 @@ static void dump_packet(const struct ipt
 		eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
 					sizeof(_esph), &_esph);
 		if (eh == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Length: 15 "SPI=0xF1234567 " */
-		printk("SPI=0x%x ", ntohl(eh->spi));
+		ve_printk(VE_LOG, "SPI=0x%x ", ntohl(eh->spi));
 		break;
 	}
 	/* Max length: 10 "PROTO 255 " */
 	default:
-		printk("PROTO=%u ", ih->protocol);
+		ve_printk(VE_LOG, "PROTO=%u ", ih->protocol);
 	}
 
 	/* Proto    Max log string length */
@@ -351,8 +352,8 @@ ipt_log_packet(unsigned int hooknum,
 	       const char *prefix)
 {
 	spin_lock_bh(&log_lock);
-	printk(level_string);
-	printk("%sIN=%s OUT=%s ",
+	ve_printk(VE_LOG, level_string);
+	ve_printk(VE_LOG, "%sIN=%s OUT=%s ",
 	       prefix == NULL ? loginfo->prefix : prefix,
 	       in ? in->name : "",
 	       out ? out->name : "");
@@ -362,29 +363,29 @@ ipt_log_packet(unsigned int hooknum,
 		struct net_device *physoutdev = skb->nf_bridge->physoutdev;
 
 		if (physindev && in != physindev)
-			printk("PHYSIN=%s ", physindev->name);
+			ve_printk(VE_LOG, "PHYSIN=%s ", physindev->name);
 		if (physoutdev && out != physoutdev)
-			printk("PHYSOUT=%s ", physoutdev->name);
+			ve_printk(VE_LOG, "PHYSOUT=%s ", physoutdev->name);
 	}
 #endif
 
 	if (in && !out) {
 		/* MAC logging for input chain only. */
-		printk("MAC=");
+		ve_printk(VE_LOG, "MAC=");
 		if (skb->dev && skb->dev->hard_header_len
 		    && skb->mac.raw != (void*)skb->nh.iph) {
 			int i;
 			unsigned char *p = skb->mac.raw;
 			for (i = 0; i < skb->dev->hard_header_len; i++,p++)
-				printk("%02x%c", *p,
+				ve_printk(VE_LOG, "%02x%c", *p,
 				       i==skb->dev->hard_header_len - 1
 				       ? ' ':':');
 		} else
-			printk(" ");
+			ve_printk(VE_LOG, " ");
 	}
 
 	dump_packet(loginfo, skb, 0);
-	printk("\n");
+	ve_printk(VE_LOG, "\n");
 	spin_unlock_bh(&log_lock);
 }
 
@@ -449,28 +450,62 @@ static int ipt_log_checkentry(const char
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int ipt_log_compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_log_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_log_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_log_reg = {
 	.name		= "LOG",
 	.target		= ipt_log_target,
 	.checkentry	= ipt_log_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_log_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_LOG(void)
+{
+	return visible_ipt_register_target(&ipt_log_reg);
+}
+
+void fini_iptable_LOG(void)
+{
+	visible_ipt_unregister_target(&ipt_log_reg);
+}
+
 static int __init init(void)
 {
-	if (ipt_register_target(&ipt_log_reg))
-		return -EINVAL;
+	int err;
+
+	err = init_iptable_LOG();
+	if (err < 0)
+		return err;
 	if (nflog)
 		nf_log_register(PF_INET, &ipt_logfn);
-	
+
+	KSYMRESOLVE(init_iptable_LOG);
+	KSYMRESOLVE(fini_iptable_LOG);
+	KSYMMODRESOLVE(ipt_LOG);
 	return 0;
 }
 
 static void __exit fini(void)
 {
+	KSYMMODUNRESOLVE(ipt_LOG);
+	KSYMUNRESOLVE(init_iptable_LOG);
+	KSYMUNRESOLVE(fini_iptable_LOG);
 	if (nflog)
 		nf_log_unregister(PF_INET, &ipt_logfn);
-	ipt_unregister_target(&ipt_log_reg);
+	fini_iptable_LOG();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_MARK.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_MARK.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_MARK.c	2004-10-19 01:54:30.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_MARK.c	2011-06-15 19:26:19.000000000 +0400
@@ -44,14 +44,15 @@ checkentry(const char *tablename,
            unsigned int hook_mask)
 {
 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
-		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
+		ve_printk(VE_LOG, KERN_WARNING "MARK: targinfosize %u != %Zu\n",
 		       targinfosize,
 		       IPT_ALIGN(sizeof(struct ipt_mark_target_info)));
 		return 0;
 	}
 
 	if (strcmp(tablename, "mangle") != 0) {
-		printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+		ve_printk(VE_LOG, KERN_WARNING "MARK: can only be called from "
+				"\"mangle\" table, not \"%s\"\n", tablename);
 		return 0;
 	}
 
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_MASQUERADE.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_MASQUERADE.c	2004-10-19 01:54:37.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_MASQUERADE.c	2011-06-15 19:26:19.000000000 +0400
@@ -140,6 +140,7 @@ masquerade_target(struct sk_buff **pskb,
 	return ip_nat_setup_info(ct, &newrange, hooknum);
 }
 
+#if 0
 static inline int
 device_cmp(const struct ip_conntrack *i, void *_ina)
 {
@@ -173,6 +174,7 @@ static int masq_inet_event(struct notifi
 static struct notifier_block masq_inet_notifier = {
 	.notifier_call	= masq_inet_event,
 };
+#endif
 
 static struct ipt_target masquerade = {
 	.name		= "MASQUERADE",
@@ -187,9 +189,13 @@ static int __init init(void)
 
 	ret = ipt_register_target(&masquerade);
 
+#if 0
+/*	This notifier is unnecessary and may
+	lead to oops in virtual environments */
 	if (ret == 0)
 		/* Register IP address change reports */
 		register_inetaddr_notifier(&masq_inet_notifier);
+#endif
 
 	return ret;
 }
@@ -197,7 +203,7 @@ static int __init init(void)
 static void __exit fini(void)
 {
 	ipt_unregister_target(&masquerade);
-	unregister_inetaddr_notifier(&masq_inet_notifier);	
+/*	unregister_inetaddr_notifier(&masq_inet_notifier);	*/
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_REDIRECT.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_REDIRECT.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_REDIRECT.c	2004-10-19 01:53:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_REDIRECT.c	2011-06-15 19:26:22.000000000 +0400
@@ -17,6 +17,7 @@
 #include <linux/inetdevice.h>
 #include <net/protocol.h>
 #include <net/checksum.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 
@@ -41,7 +42,7 @@ redirect_check(const char *tablename,
 	const struct ip_nat_multi_range *mr = targinfo;
 
 	if (strcmp(tablename, "nat") != 0) {
-		DEBUGP("redirect_check: bad table `%s'.\n", table);
+		DEBUGP("redirect_check: bad table `%s'.\n", tablename);
 		return 0;
 	}
 	if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
@@ -88,14 +89,18 @@ redirect_target(struct sk_buff **pskb,
 		newdst = htonl(0x7F000001);
 	else {
 		struct in_device *indev;
+		struct in_ifaddr *ifa;
 
 		/* Device might not have an associated in_device. */
 		indev = (struct in_device *)(*pskb)->dev->ip_ptr;
 		if (indev == NULL || indev->ifa_list == NULL)
 			return NF_DROP;
 
+		ifa = indev->ifa_list;
+		if (IN_LOOPBACK(ntohl(ifa->ifa_local)) && ifa->ifa_next)
+			ifa = ifa->ifa_next;
 		/* Grab first address on interface. */
-		newdst = indev->ifa_list->ifa_local;
+		newdst = ifa->ifa_local;
 	}
 
 	/* Transfer from original range. */
@@ -108,21 +113,133 @@ redirect_target(struct sk_buff **pskb,
 	return ip_nat_setup_info(ct, &newrange, hooknum);
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_target *pt;
+	struct ip_nat_multi_range *pinfo;
+	struct compat_ip_nat_multi_range info;
+	u_int16_t tsize;
+
+	pt = (struct ipt_entry_target *)target;
+	tsize = pt->u.user.target_size;
+	if (__copy_to_user(*dstptr, pt, sizeof(struct ipt_entry_target)))
+		return -EFAULT;
+	pinfo = (struct ip_nat_multi_range *)pt->data;
+	memset(&info, 0, sizeof(struct compat_ip_nat_multi_range));
+	info.rangesize = pinfo->rangesize;
+	info.range[0].flags = pinfo->range[0].flags;
+	info.range[0].min_ip = pinfo->range[0].min_ip;
+	info.range[0].max_ip = pinfo->range[0].max_ip;
+	info.range[0].min = pinfo->range[0].min;
+	info.range[0].max = pinfo->range[0].max;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_target),
+				&info, sizeof(struct compat_ip_nat_multi_range)))
+		return -EFAULT;
+	tsize -= off;
+	if (put_user(tsize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int compat_from_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_target *pt;
+	struct ipt_entry_target *dstpt;
+	struct compat_ip_nat_multi_range *pinfo;
+	struct ip_nat_multi_range info;
+	u_int16_t tsize;
+
+	pt = (struct compat_ipt_entry_target *)target;
+	dstpt = (struct ipt_entry_target *)*dstptr;
+	tsize = pt->u.user.target_size;
+	memcpy(*dstptr, pt, sizeof(struct compat_ipt_entry_target));
+	pinfo = (struct compat_ip_nat_multi_range *)pt->data;
+	memset(&info, 0, sizeof(struct ip_nat_multi_range));
+	info.rangesize = pinfo->rangesize;
+	info.range[0].flags = pinfo->range[0].flags;
+	info.range[0].min_ip = pinfo->range[0].min_ip;
+	info.range[0].max_ip = pinfo->range[0].max_ip;
+	info.range[0].min = pinfo->range[0].min;
+	info.range[0].max = pinfo->range[0].max;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_target),
+				&info, sizeof(struct ip_nat_multi_range));
+	tsize += off;
+	dstpt->u.user.target_size = tsize;
+	*size += off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int redirect_compat(void *target, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = IPT_ALIGN(sizeof(struct ip_nat_multi_range)) -
+		COMPAT_IPT_ALIGN(sizeof(struct compat_ip_nat_multi_range));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(target, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(target, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct ipt_target redirect_reg = {
 	.name		= "REDIRECT",
 	.target		= redirect_target,
 	.checkentry	= redirect_check,
+#ifdef CONFIG_COMPAT
+	.compat		= redirect_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_REDIRECT(void)
+{
+	return visible_ipt_register_target(&redirect_reg);
+}
+
+void fini_iptable_REDIRECT(void)
+{
+	visible_ipt_unregister_target(&redirect_reg);
+}
+
 static int __init init(void)
 {
-	return ipt_register_target(&redirect_reg);
+	int err;
+
+	err = init_iptable_REDIRECT();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_REDIRECT);
+	KSYMRESOLVE(fini_iptable_REDIRECT);
+	KSYMMODRESOLVE(ipt_REDIRECT);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_target(&redirect_reg);
+	KSYMMODUNRESOLVE(ipt_REDIRECT);
+	KSYMUNRESOLVE(init_iptable_REDIRECT);
+	KSYMUNRESOLVE(fini_iptable_REDIRECT);
+	fini_iptable_REDIRECT();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_REJECT.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_REJECT.c	2004-10-19 01:54:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_REJECT.c	2011-06-15 19:26:22.000000000 +0400
@@ -22,6 +22,7 @@
 #include <net/ip.h>
 #include <net/tcp.h>
 #include <net/route.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_REJECT.h>
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -441,7 +442,7 @@ static int check(const char *tablename,
 	}
 
 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
-		printk("REJECT: ECHOREPLY no longer supported.\n");
+		ve_printk(VE_LOG, "REJECT: ECHOREPLY no longer supported.\n");
 		return 0;
 	} else if (rejinfo->with == IPT_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
@@ -455,21 +456,58 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_reject_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_reject_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_reject_reg = {
 	.name		= "REJECT",
 	.target		= reject,
 	.checkentry	= check,
+#ifdef CONFIG_COMPAT
+	.compat		= compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_REJECT(void)
+{
+	return visible_ipt_register_target(&ipt_reject_reg);
+}
+
+void fini_iptable_REJECT(void)
+{
+	visible_ipt_unregister_target(&ipt_reject_reg);
+}
+
 static int __init init(void)
 {
-	return ipt_register_target(&ipt_reject_reg);
+	int err;
+
+	err = init_iptable_REJECT();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_REJECT);
+	KSYMRESOLVE(fini_iptable_REJECT);
+	KSYMMODRESOLVE(ipt_REJECT);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_target(&ipt_reject_reg);
+	KSYMMODUNRESOLVE(ipt_REJECT);
+	KSYMUNRESOLVE(init_iptable_REJECT);
+	KSYMUNRESOLVE(fini_iptable_REJECT);
+	fini_iptable_REJECT();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_TCPMSS.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_TCPMSS.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_TCPMSS.c	2004-10-19 01:54:25.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_TCPMSS.c	2011-06-15 19:26:22.000000000 +0400
@@ -13,6 +13,7 @@
 
 #include <linux/ip.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_TCPMSS.h>
@@ -228,7 +229,8 @@ ipt_tcpmss_checkentry(const char *tablen
 			((hook_mask & ~((1 << NF_IP_FORWARD)
 			   	| (1 << NF_IP_LOCAL_OUT)
 			   	| (1 << NF_IP_POST_ROUTING))) != 0)) {
-		printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+		ve_printk(VE_LOG, "TCPMSS: path-MTU clamping only supported in "
+				"FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return 0;
 	}
 
@@ -237,25 +239,62 @@ ipt_tcpmss_checkentry(const char *tablen
 	    && IPT_MATCH_ITERATE(e, find_syn_match))
 		return 1;
 
-	printk("TCPMSS: Only works on TCP SYN packets\n");
+	ve_printk(VE_LOG, "TCPMSS: Only works on TCP SYN packets\n");
 	return 0;
 }
 
+#ifdef CONFIG_COMPAT
+static int ipt_tcpmss_compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_tcpmss_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_tcpmss_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_tcpmss_reg = {
 	.name		= "TCPMSS",
 	.target		= ipt_tcpmss_target,
 	.checkentry	= ipt_tcpmss_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_tcpmss_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_TCPMSS(void)
+{
+	return visible_ipt_register_target(&ipt_tcpmss_reg);
+}
+
+void fini_iptable_TCPMSS(void)
+{
+	visible_ipt_unregister_target(&ipt_tcpmss_reg);
+}
+
 static int __init init(void)
 {
-	return ipt_register_target(&ipt_tcpmss_reg);
+	int err;
+
+	err = init_iptable_TCPMSS();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_TCPMSS);
+	KSYMRESOLVE(fini_iptable_TCPMSS);
+	KSYMMODRESOLVE(ipt_TCPMSS);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_target(&ipt_tcpmss_reg);
+	KSYMMODUNRESOLVE(ipt_TCPMSS);
+	KSYMUNRESOLVE(init_iptable_TCPMSS);
+	KSYMUNRESOLVE(fini_iptable_TCPMSS);
+	fini_iptable_TCPMSS();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_TOS.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_TOS.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_TOS.c	2004-10-19 01:53:09.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_TOS.c	2011-06-15 19:26:22.000000000 +0400
@@ -15,6 +15,7 @@
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_TOS.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -61,14 +62,15 @@ checkentry(const char *tablename,
 	const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
 
 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tos_target_info))) {
-		printk(KERN_WARNING "TOS: targinfosize %u != %Zu\n",
+		ve_printk(VE_LOG, KERN_WARNING "TOS: targinfosize %u != %Zu\n",
 		       targinfosize,
 		       IPT_ALIGN(sizeof(struct ipt_tos_target_info)));
 		return 0;
 	}
 
 	if (strcmp(tablename, "mangle") != 0) {
-		printk(KERN_WARNING "TOS: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+		ve_printk(VE_LOG, KERN_WARNING "TOS: can only be called from "
+				"\"mangle\" table, not \"%s\"\n", tablename);
 		return 0;
 	}
 
@@ -77,28 +79,65 @@ checkentry(const char *tablename,
 	    && tos != IPTOS_RELIABILITY
 	    && tos != IPTOS_MINCOST
 	    && tos != IPTOS_NORMALSVC) {
-		printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
+		ve_printk(VE_LOG, KERN_WARNING "TOS: bad tos value %#x\n", tos);
 		return 0;
 	}
 
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_tos_target_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_target_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_tos_reg = {
 	.name		= "TOS",
 	.target		= target,
 	.checkentry	= checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_TOS(void)
+{
+	return visible_ipt_register_target(&ipt_tos_reg);
+}
+
+void fini_iptable_TOS(void)
+{
+	visible_ipt_unregister_target(&ipt_tos_reg);
+}
+
 static int __init init(void)
 {
-	return ipt_register_target(&ipt_tos_reg);
+	int err;
+
+	err = init_iptable_TOS();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_TOS);
+	KSYMRESOLVE(fini_iptable_TOS);
+	KSYMMODRESOLVE(ipt_TOS);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_target(&ipt_tos_reg);
+	KSYMMODUNRESOLVE(ipt_TOS);
+	KSYMUNRESOLVE(init_iptable_TOS);
+	KSYMUNRESOLVE(fini_iptable_TOS);
+	fini_iptable_TOS();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_ULOG.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_ULOG.c	2004-10-19 01:53:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_ULOG.c	2011-06-15 19:26:19.000000000 +0400
@@ -129,6 +129,9 @@ static void ulog_send(unsigned int nlgro
 /* timer function to flush queue in flushtimeout time */
 static void ulog_timer(unsigned long data)
 {
+#ifdef CONFIG_VE
+#error timer context should be evaluated
+#endif
 	DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n");
 
 	/* lock to protect against somebody modifying our structure
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_conntrack.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_conntrack.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_conntrack.c	2004-10-19 01:55:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_conntrack.c	2011-06-15 19:26:22.000000000 +0400
@@ -114,10 +114,112 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct ipt_conntrack_info *pinfo;
+	struct compat_ipt_conntrack_info info;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct ipt_conntrack_info *)pm->data;
+	memset(&info, 0, sizeof(struct compat_ipt_conntrack_info));
+	info.statemask = pinfo->statemask;
+	info.statusmask = pinfo->statusmask;
+	memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
+			sizeof(struct ip_conntrack_tuple));
+	memcpy(info.sipmsk, pinfo->sipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	memcpy(info.dipmsk, pinfo->dipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	info.expires_min = pinfo->expires_min;
+	info.expires_max = pinfo->expires_max;
+	info.flags = pinfo->flags;
+	info.invflags = pinfo->invflags;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&info, sizeof(struct compat_ipt_conntrack_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_ipt_conntrack_info *pinfo;
+	struct ipt_conntrack_info info;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_ipt_conntrack_info *)pm->data;
+	memset(&info, 0, sizeof(struct ipt_conntrack_info));
+	info.statemask = pinfo->statemask;
+	info.statusmask = pinfo->statusmask;
+	memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
+			sizeof(struct ip_conntrack_tuple));
+	memcpy(info.sipmsk, pinfo->sipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	memcpy(info.dipmsk, pinfo->dipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	info.expires_min = pinfo->expires_min;
+	info.expires_max = pinfo->expires_max;
+	info.flags = pinfo->flags;
+	info.invflags = pinfo->invflags;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&info, sizeof(struct ipt_conntrack_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat(void *match, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_conntrack_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_conntrack_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(match, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(match, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct ipt_match conntrack_match = {
 	.name		= "conntrack",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_helper.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_helper.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_helper.c	2004-10-19 01:55:36.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_helper.c	2011-06-15 19:26:22.000000000 +0400
@@ -18,6 +18,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_helper.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
@@ -98,22 +99,126 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct ipt_helper_info *pinfo;
+	struct compat_ipt_helper_info info;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct ipt_helper_info *)pm->data;
+	memset(&info, 0, sizeof(struct compat_ipt_helper_info));
+	info.invert = pinfo->invert;
+	memcpy(info.name, pinfo->name, 30);
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&info, sizeof(struct compat_ipt_helper_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_ipt_helper_info *pinfo;
+	struct ipt_helper_info info;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_ipt_helper_info *)pm->data;
+	memset(&info, 0, sizeof(struct ipt_helper_info));
+	info.invert = pinfo->invert;
+	memcpy(info.name, pinfo->name, 30);
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&info, sizeof(struct ipt_helper_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat(void *match, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_helper_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_helper_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(match, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(match, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct ipt_match helper_match = {
 	.name		= "helper",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_helper(void)
+{
+	return visible_ipt_register_match(&helper_match);
+}
+
+void fini_iptable_helper(void)
+{
+	visible_ipt_unregister_match(&helper_match);
+}
+
 static int __init init(void)
 {
 	need_ip_conntrack();
-	return ipt_register_match(&helper_match);
+	int err;
+
+	err = init_iptable_helper();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_helper);
+	KSYMRESOLVE(fini_iptable_helper);
+	KSYMMODRESOLVE(ipt_helper);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&helper_match);
+	KSYMMODUNRESOLVE(ipt_helper);
+	KSYMUNRESOLVE(init_iptable_helper);
+	KSYMUNRESOLVE(fini_iptable_helper);
+	fini_iptable_helper();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_length.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_length.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_length.c	2004-10-19 01:54:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_length.c	2011-06-15 19:26:22.000000000 +0400
@@ -8,6 +8,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ipt_length.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -43,21 +44,58 @@ checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_length_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_length_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_match length_match = {
 	.name		= "length",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_length(void)
+{
+	return visible_ipt_register_match(&length_match);
+}
+
+void fini_iptable_length(void)
+{
+	visible_ipt_unregister_match(&length_match);
+}
+
 static int __init init(void)
 {
-	return ipt_register_match(&length_match);
+	int err;
+
+	err = init_iptable_length();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_length);
+	KSYMRESOLVE(fini_iptable_length);
+	KSYMMODRESOLVE(ipt_length);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&length_match);
+	KSYMMODUNRESOLVE(ipt_length);
+	KSYMUNRESOLVE(init_iptable_length);
+	KSYMUNRESOLVE(fini_iptable_length);
+	fini_iptable_length();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_limit.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_limit.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_limit.c	2004-10-19 01:53:09.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_limit.c	2011-06-15 19:26:22.000000000 +0400
@@ -17,6 +17,7 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_limit.h>
@@ -25,6 +26,13 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
 MODULE_DESCRIPTION("iptables rate limit match");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ipt_limit_reg	(*(get_exec_env()->_ipt_limit_reg))
+#else
+#define ve_ipt_limit_reg	ipt_limit_reg
+#endif
+
 /* The algorithm used is the Simple Token Bucket Filter (TBF)
  * see net/sched/sch_tbf.c in the linux source tree
  */
@@ -116,7 +124,7 @@ ipt_limit_checkentry(const char *tablena
 	/* Check for overflow. */
 	if (r->burst == 0
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
-		printk("Overflow in ipt_limit, try lower: %u/%u\n",
+		ve_printk(VE_LOG, "Overflow in ipt_limit, try lower: %u/%u\n",
 		       r->avg, r->burst);
 		return 0;
 	}
@@ -134,23 +142,128 @@ ipt_limit_checkentry(const char *tablena
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int ipt_limit_compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct ipt_rateinfo *pinfo;
+	struct compat_ipt_rateinfo rinfo;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct ipt_rateinfo *)pm->data;
+	memset(&rinfo, 0, sizeof(struct compat_ipt_rateinfo));
+	rinfo.avg = pinfo->avg;
+	rinfo.burst = pinfo->burst;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&rinfo, sizeof(struct compat_ipt_rateinfo)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int ipt_limit_compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_ipt_rateinfo *pinfo;
+	struct ipt_rateinfo rinfo;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_ipt_rateinfo *)pm->data;
+	memset(&rinfo, 0, sizeof(struct ipt_rateinfo));
+	rinfo.avg = pinfo->avg;
+	rinfo.burst = pinfo->burst;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&rinfo, sizeof(struct ipt_rateinfo));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int ipt_limit_compat(void *match, void **dstptr,
+		int *size, int convert)
+{
+	int ret, off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_rateinfo)) -
+		COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_rateinfo));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = ipt_limit_compat_to_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = ipt_limit_compat_from_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct ipt_match ipt_limit_reg = {
 	.name		= "limit",
 	.match		= ipt_limit_match,
 	.checkentry	= ipt_limit_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_limit_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_limit(void)
+{
+	return visible_ipt_register_match(&ipt_limit_reg);
+}
+
+void fini_iptable_limit(void)
+{
+	visible_ipt_unregister_match(&ipt_limit_reg);
+}
+
 static int __init init(void)
 {
-	if (ipt_register_match(&ipt_limit_reg))
-		return -EINVAL;
+	int err;
+
+	err = init_iptable_limit();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_limit);
+	KSYMRESOLVE(fini_iptable_limit);
+	KSYMMODRESOLVE(ipt_limit);
 	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&ipt_limit_reg);
+	KSYMMODUNRESOLVE(ipt_limit);
+	KSYMUNRESOLVE(init_iptable_limit);
+	KSYMUNRESOLVE(fini_iptable_limit);
+	fini_iptable_limit();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_mac.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_mac.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_mac.c	2004-10-19 01:53:13.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_mac.c	2011-06-15 19:26:19.000000000 +0400
@@ -48,7 +48,8 @@ ipt_mac_checkentry(const char *tablename
 	if (hook_mask
 	    & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
 		| (1 << NF_IP_FORWARD))) {
-		printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
+		ve_printk(VE_LOG, "ipt_mac: only valid for PRE_ROUTING, "
+				"LOCAL_IN or FORWARD.\n");
 		return 0;
 	}
 
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_multiport.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_multiport.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_multiport.c	2004-10-19 01:53:23.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_multiport.c	2011-06-15 19:26:22.000000000 +0400
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/udp.h>
 #include <linux/skbuff.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ipt_multiport.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -21,6 +22,13 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables multiple port match module");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_multiport_match	(*(get_exec_env()->_multiport_match))
+#else
+#define ve_multiport_match	multiport_match
+#endif
+
 #if 0
 #define duprintf(format, args...) printk(format , ## args)
 #else
@@ -103,21 +111,58 @@ checkentry(const char *tablename,
 		&& multiinfo->count <= IPT_MULTI_PORTS;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_multiport)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_multiport));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_match multiport_match = {
 	.name		= "multiport",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_multiport(void)
+{
+	return visible_ipt_register_match(&multiport_match);
+}
+
+void fini_iptable_multiport(void)
+{
+	visible_ipt_unregister_match(&multiport_match);
+}
+
 static int __init init(void)
 {
-	return ipt_register_match(&multiport_match);
+	int err;
+
+	err = init_iptable_multiport();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_multiport);
+	KSYMRESOLVE(fini_iptable_multiport);
+	KSYMMODRESOLVE(ipt_multiport);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&multiport_match);
+	KSYMMODUNRESOLVE(ipt_multiport);
+	KSYMUNRESOLVE(init_iptable_multiport);
+	KSYMUNRESOLVE(fini_iptable_multiport);
+	fini_iptable_multiport();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_owner.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_owner.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_owner.c	2004-10-19 01:55:24.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_owner.c	2011-06-15 19:26:19.000000000 +0400
@@ -12,6 +12,7 @@
 #include <linux/skbuff.h>
 #include <linux/file.h>
 #include <net/sock.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ipt_owner.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -23,12 +24,13 @@ MODULE_DESCRIPTION("iptables owner match
 static int
 match_comm(const struct sk_buff *skb, const char *comm)
 {
+#ifndef CONFIG_VE
 	struct task_struct *g, *p;
 	struct files_struct *files;
 	int i;
 
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_ve(g, p) {
 		if(strncmp(p->comm, comm, sizeof(p->comm)))
 			continue;
 
@@ -48,20 +50,22 @@ match_comm(const struct sk_buff *skb, co
 			spin_unlock(&files->file_lock);
 		}
 		task_unlock(p);
-	} while_each_thread(g, p);
+	} while_each_thread_ve(g, p);
 	read_unlock(&tasklist_lock);
+#endif
 	return 0;
 }
 
 static int
 match_pid(const struct sk_buff *skb, pid_t pid)
 {
+#ifndef CONFIG_VE
 	struct task_struct *p;
 	struct files_struct *files;
 	int i;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p)
 		goto out;
 	task_lock(p);
@@ -82,18 +86,20 @@ match_pid(const struct sk_buff *skb, pid
 	task_unlock(p);
 out:
 	read_unlock(&tasklist_lock);
+#endif
 	return 0;
 }
 
 static int
 match_sid(const struct sk_buff *skb, pid_t sid)
 {
+#ifndef CONFIG_VE
 	struct task_struct *g, *p;
 	struct file *file = skb->sk->sk_socket->file;
 	int i, found=0;
 
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_ve(g, p) {
 		struct files_struct *files;
 		if (p->signal->session != sid)
 			continue;
@@ -113,11 +119,14 @@ match_sid(const struct sk_buff *skb, pid
 		task_unlock(p);
 		if (found)
 			goto out;
-	} while_each_thread(g, p);
+	} while_each_thread_ve(g, p);
 out:
 	read_unlock(&tasklist_lock);
 
 	return found;
+#else
+	return 0;
+#endif
 }
 
 static int
@@ -203,14 +212,36 @@ static struct ipt_match owner_match = {
 	.me		= THIS_MODULE,
 };
 
+static int init_iptable_owner(void)
+{
+	return visible_ipt_register_match(&owner_match);
+}
+
+static void fini_iptable_owner(void)
+{
+	visible_ipt_unregister_match(&owner_match);
+}
+
 static int __init init(void)
 {
-	return ipt_register_match(&owner_match);
+	int err;
+
+	err = init_iptable_owner();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_owner);
+	KSYMRESOLVE(fini_iptable_owner);
+	KSYMMODRESOLVE(ipt_owner);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&owner_match);
+	KSYMMODUNRESOLVE(ipt_owner);
+	KSYMUNRESOLVE(init_iptable_owner);
+	KSYMUNRESOLVE(fini_iptable_owner);
+	fini_iptable_owner();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_state.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_state.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_state.c	2004-10-19 01:55:35.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_state.c	2011-06-15 19:26:22.000000000 +0400
@@ -10,6 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_state.h>
@@ -52,22 +53,124 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct ipt_state_info *pinfo;
+	struct compat_ipt_state_info info;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct ipt_state_info *)pm->data;
+	memset(&info, 0, sizeof(struct compat_ipt_state_info));
+	info.statemask = pinfo->statemask;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&info, sizeof(struct compat_ipt_state_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_ipt_state_info *pinfo;
+	struct ipt_state_info info;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_ipt_state_info *)pm->data;
+	memset(&info, 0, sizeof(struct ipt_state_info));
+	info.statemask = pinfo->statemask;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&info, sizeof(struct ipt_state_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat(void *match, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_state_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_state_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(match, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(match, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct ipt_match state_match = {
 	.name		= "state",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_state(void)
+{
+	return visible_ipt_register_match(&state_match);
+}
+
+void fini_iptable_state(void)
+{
+	visible_ipt_unregister_match(&state_match);
+}
+
 static int __init init(void)
 {
+	int err;
+
 	need_ip_conntrack();
-	return ipt_register_match(&state_match);
+	err = init_iptable_state();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_state);
+	KSYMRESOLVE(fini_iptable_state);
+	KSYMMODRESOLVE(ipt_state);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&state_match);
+	KSYMMODUNRESOLVE(ipt_state);
+	KSYMUNRESOLVE(init_iptable_state);
+	KSYMUNRESOLVE(fini_iptable_state);
+	fini_iptable_state();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_tcpmss.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_tcpmss.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_tcpmss.c	2004-10-19 01:53:21.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_tcpmss.c	2011-06-15 19:26:22.000000000 +0400
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ipt_tcpmss.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -111,28 +112,65 @@ checkentry(const char *tablename,
 
 	/* Must specify -p tcp */
 	if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) {
-		printk("tcpmss: Only works on TCP packets\n");
+		ve_printk(VE_LOG, "tcpmss: Only works on TCP packets\n");
 		return 0;
 	}
 
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_tcpmss_match_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_tcpmss_match_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_match tcpmss_match = {
 	.name		= "tcpmss",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_tcpmss(void)
+{
+	return visible_ipt_register_match(&tcpmss_match);
+}
+
+void fini_iptable_tcpmss(void)
+{
+	visible_ipt_unregister_match(&tcpmss_match);
+}
+
 static int __init init(void)
 {
-	return ipt_register_match(&tcpmss_match);
+	int err;
+
+	err = init_iptable_tcpmss();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_tcpmss);
+	KSYMRESOLVE(fini_iptable_tcpmss);
+	KSYMMODRESOLVE(ipt_tcpmss);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&tcpmss_match);
+	KSYMMODUNRESOLVE(ipt_tcpmss);
+	KSYMUNRESOLVE(init_iptable_tcpmss);
+	KSYMUNRESOLVE(fini_iptable_tcpmss);
+	fini_iptable_tcpmss();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_tos.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_tos.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_tos.c	2004-10-19 01:55:28.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_tos.c	2011-06-15 19:26:22.000000000 +0400
@@ -10,6 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ipt_tos.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -17,6 +18,13 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("iptables TOS match module");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_tos_match		(*(get_exec_env()->_tos_match))
+#else
+#define ve_tos_match		tos_match
+#endif
+
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -43,21 +51,58 @@ checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_tos_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_match tos_match = {
 	.name		= "tos",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_tos(void)
+{
+	return visible_ipt_register_match(&tos_match);
+}
+
+void fini_iptable_tos(void)
+{
+	visible_ipt_unregister_match(&tos_match);
+}
+
 static int __init init(void)
 {
-	return ipt_register_match(&tos_match);
+	int err;
+
+	err = init_iptable_tos();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_tos);
+	KSYMRESOLVE(fini_iptable_tos);
+	KSYMMODRESOLVE(ipt_tos);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&tos_match);
+	KSYMMODUNRESOLVE(ipt_tos);
+	KSYMUNRESOLVE(init_iptable_tos);
+	KSYMUNRESOLVE(fini_iptable_tos);
+	fini_iptable_tos();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_ttl.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_ttl.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/ipt_ttl.c	2004-10-19 01:55:35.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/ipt_ttl.c	2011-06-15 19:26:22.000000000 +0400
@@ -11,6 +11,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ipt_ttl.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -57,22 +58,58 @@ static int checkentry(const char *tablen
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_ttl_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_ttl_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_match ttl_match = {
 	.name		= "ttl",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
+int init_iptable_ttl(void)
+{
+	return visible_ipt_register_match(&ttl_match);
+}
+
+void fini_iptable_ttl(void)
+{
+	visible_ipt_unregister_match(&ttl_match);
+}
+
 static int __init init(void)
 {
-	return ipt_register_match(&ttl_match);
+	int err;
+
+	err = init_iptable_ttl();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_ttl);
+	KSYMRESOLVE(fini_iptable_ttl);
+	KSYMMODRESOLVE(ipt_ttl);
+	return 0;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&ttl_match);
-
+	KSYMMODUNRESOLVE(ipt_ttl);
+	KSYMUNRESOLVE(init_iptable_ttl);
+	KSYMUNRESOLVE(fini_iptable_ttl);
+	fini_iptable_ttl();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/iptable_filter.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/iptable_filter.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/iptable_filter.c	2004-10-19 01:53:50.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/iptable_filter.c	2011-06-15 19:26:19.000000000 +0400
@@ -12,12 +12,23 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <ub/ub_mem.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables filter table");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_filter	(*(get_exec_env()->_ve_ipt_filter_pf))
+#define ve_ipt_ops		(get_exec_env()->_ve_ipt_filter_io)
+#else
+#define	ve_packet_filter	packet_filter
+#define ve_ipt_ops		ipt_ops
+#endif
+
 #define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
 
 /* Standard entry. */
@@ -39,12 +50,12 @@ struct ipt_error
 	struct ipt_error_target target;
 };
 
-static struct
+static struct ipt_filter_initial_table
 {
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
 	struct ipt_error term;
-} initial_table __initdata
+} initial_table
 = { { "filter", FILTER_VALID_HOOKS, 4,
       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
       { [NF_IP_LOCAL_IN] = 0,
@@ -109,7 +120,7 @@ ipt_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, &ve_packet_filter, NULL);
 }
 
 static unsigned int
@@ -127,7 +138,7 @@ ipt_local_out_hook(unsigned int hook,
 		return NF_ACCEPT;
 	}
 
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, &ve_packet_filter, NULL);
 }
 
 static struct nf_hook_ops ipt_ops[] = {
@@ -158,56 +169,161 @@ static struct nf_hook_ops ipt_ops[] = {
 static int forward = NF_ACCEPT;
 module_param(forward, bool, 0000);
 
-static int __init init(void)
+#ifdef CONFIG_VE_IPTABLES
+static void init_ve0_iptable_filter(struct ve_struct *envid)
+{
+	envid->_ipt_filter_initial_table = &initial_table;
+	envid->_ve_ipt_filter_pf = &packet_filter;
+	envid->_ve_ipt_filter_io = ipt_ops;
+}
+#endif
+
+int init_iptable_filter(void)
 {
 	int ret;
+#ifdef CONFIG_VE_IPTABLES
+	struct ve_struct *envid;
 
-	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
-		return -EINVAL;
-	}
+	envid = get_exec_env();
 
-	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
+	if (ve_is_super(envid)) {
+		init_ve0_iptable_filter(envid);
+	} else {
+		__module_get(THIS_MODULE);
+		ret = -ENOMEM;
+		envid->_ipt_filter_initial_table =
+				ub_kmalloc(sizeof(initial_table), GFP_KERNEL);
+		if (!envid->_ipt_filter_initial_table)
+			goto nomem_1;
+		envid->_ve_ipt_filter_pf =
+				ub_kmalloc(sizeof(packet_filter), GFP_KERNEL);
+		if (!envid->_ve_ipt_filter_pf)
+			goto nomem_2;
+		envid->_ve_ipt_filter_io =
+				ub_kmalloc(sizeof(ipt_ops), GFP_KERNEL);
+		if (!envid->_ve_ipt_filter_io)
+			goto nomem_3;
+
+		/*
+		 * Note: in general, it isn't safe to copy the static table
+		 * used for VE0, since that table is already registered
+		 * and now has some run-time information.
+		 * However, inspection of ip_tables.c shows that the only
+		 * dynamically changed fields `list' and `private' are
+		 * given new values in ipt_register_table() without looking
+		 * at the old values.  2004/06/01  SAW
+		 */
+		memcpy(envid->_ipt_filter_initial_table, &initial_table,
+				sizeof(initial_table));
+		memcpy(envid->_ve_ipt_filter_pf, &packet_filter,
+				sizeof(packet_filter));
+		memcpy(envid->_ve_ipt_filter_io, &ipt_ops[0], sizeof(ipt_ops));
+
+		envid->_ve_ipt_filter_pf->table =
+				&envid->_ipt_filter_initial_table->repl;
+	}
+#endif
 
 	/* Register table */
-	ret = ipt_register_table(&packet_filter);
+	ret = ipt_register_table(&ve_packet_filter);
 	if (ret < 0)
-		return ret;
+		goto nomem_4;
 
 	/* Register hooks */
-	ret = nf_register_hook(&ipt_ops[0]);
+	ret = nf_register_hook(&ve_ipt_ops[0]);
 	if (ret < 0)
 		goto cleanup_table;
 
-	ret = nf_register_hook(&ipt_ops[1]);
+	ret = nf_register_hook(&ve_ipt_ops[1]);
 	if (ret < 0)
 		goto cleanup_hook0;
 
-	ret = nf_register_hook(&ipt_ops[2]);
+	ret = nf_register_hook(&ve_ipt_ops[2]);
 	if (ret < 0)
 		goto cleanup_hook1;
 
 	return ret;
 
  cleanup_hook1:
-	nf_unregister_hook(&ipt_ops[1]);
+	nf_unregister_hook(&ve_ipt_ops[1]);
  cleanup_hook0:
-	nf_unregister_hook(&ipt_ops[0]);
+	nf_unregister_hook(&ve_ipt_ops[0]);
  cleanup_table:
-	ipt_unregister_table(&packet_filter);
-
+	ipt_unregister_table(&ve_packet_filter);
+ nomem_4:
+#ifdef CONFIG_VE_IPTABLES
+	if (!ve_is_super(envid))
+		kfree(envid->_ve_ipt_filter_io);
+	envid->_ve_ipt_filter_io = NULL;
+ nomem_3:
+	if (!ve_is_super(envid))
+		kfree(envid->_ve_ipt_filter_pf);
+	envid->_ve_ipt_filter_pf = NULL;
+ nomem_2:
+	if (!ve_is_super(envid))
+		kfree(envid->_ipt_filter_initial_table);
+	envid->_ipt_filter_initial_table = NULL;
+ nomem_1:
+	if (!ve_is_super(envid))
+		module_put(THIS_MODULE);
+#endif
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_iptable_filter(void)
 {
 	unsigned int i;
+#ifdef CONFIG_VE_IPTABLES
+	struct ve_struct *envid;
+#endif
 
 	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
-		nf_unregister_hook(&ipt_ops[i]);
+		nf_unregister_hook(&ve_ipt_ops[i]);
+
+	ipt_unregister_table(&ve_packet_filter);
+
+#ifdef CONFIG_VE_IPTABLES
+	envid = get_exec_env();
+	if (envid->_ipt_filter_initial_table != NULL && !ve_is_super(envid)) {
+		kfree(envid->_ipt_filter_initial_table);
+		kfree(envid->_ve_ipt_filter_pf);
+		kfree(envid->_ve_ipt_filter_io);
+		module_put(THIS_MODULE);
+	}
+	envid->_ipt_filter_initial_table = NULL;
+	envid->_ve_ipt_filter_pf = NULL;
+	envid->_ve_ipt_filter_io = NULL; 
+#endif
+}
+
+static int __init init(void)
+{
+	int err;
 
-	ipt_unregister_table(&packet_filter);
+	if (forward < 0 || forward > NF_MAX_VERDICT) {
+		printk("iptables forward must be 0 or 1\n");
+		return -EINVAL;
+	}
+
+	/* Entry 1 is the FORWARD hook */
+	initial_table.entries[1].target.verdict = -forward - 1;
+
+	err = init_iptable_filter();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_filter);
+	KSYMRESOLVE(fini_iptable_filter);
+	KSYMMODRESOLVE(iptable_filter);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_filter);
+	KSYMUNRESOLVE(init_iptable_filter);
+	KSYMUNRESOLVE(fini_iptable_filter);
+	fini_iptable_filter();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/netfilter/iptable_mangle.c linux-2.6.9-ve023stab054/net/ipv4/netfilter/iptable_mangle.c
--- linux-2.6.9-100.orig/net/ipv4/netfilter/iptable_mangle.c	2004-10-19 01:55:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/netfilter/iptable_mangle.c	2011-06-15 19:26:19.000000000 +0400
@@ -17,6 +17,7 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/route.h>
+#include <linux/nfcalls.h>
 #include <linux/ip.h>
 
 MODULE_LICENSE("GPL");
@@ -54,7 +55,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[5];
 	struct ipt_error term;
-} initial_table __initdata
+} initial_table
 = { { "mangle", MANGLE_VALID_HOOKS, 6,
       sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
       { [NF_IP_PRE_ROUTING] 	= 0,
@@ -131,6 +132,13 @@ static struct ipt_table packet_mangler =
 	.me		= THIS_MODULE,
 };
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_mangler	(*(get_exec_env()->_ipt_mangle_table))
+#else
+#define ve_packet_mangler	packet_mangler
+#endif
+
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ipt_route_hook(unsigned int hook,
@@ -139,7 +147,7 @@ ipt_route_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	return ipt_do_table(pskb, hook, in, out, &ve_packet_mangler, NULL);
 }
 
 static unsigned int
@@ -168,7 +176,8 @@ ipt_local_hook(unsigned int hook,
 	daddr = (*pskb)->nh.iph->daddr;
 	tos = (*pskb)->nh.iph->tos;
 
-	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	ret = ipt_do_table(pskb, hook, in, out, &ve_packet_mangler, NULL);
+
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
 	    && ((*pskb)->nh.iph->saddr != saddr
@@ -220,12 +229,12 @@ static struct nf_hook_ops ipt_ops[] = {
 	},
 };
 
-static int __init init(void)
+static int mangle_init(struct ipt_table *packet_mangler, struct nf_hook_ops ipt_ops[])
 {
 	int ret;
 
 	/* Register table */
-	ret = ipt_register_table(&packet_mangler);
+	ret = ipt_register_table(packet_mangler);
 	if (ret < 0)
 		return ret;
 
@@ -261,19 +270,117 @@ static int __init init(void)
  cleanup_hook0:
 	nf_unregister_hook(&ipt_ops[0]);
  cleanup_table:
-	ipt_unregister_table(&packet_mangler);
+	ipt_unregister_table(packet_mangler);
 
 	return ret;
 }
 
-static void __exit fini(void)
+static void mangle_fini(struct ipt_table *packet_mangler, struct nf_hook_ops ipt_ops[])
 {
 	unsigned int i;
 
-	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
+	for (i = 0; i < 5; i++)
 		nf_unregister_hook(&ipt_ops[i]);
 
-	ipt_unregister_table(&packet_mangler);
+	ipt_unregister_table(packet_mangler);
+}
+
+int init_iptable_mangle(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+	struct ve_struct *envid;
+	struct ipt_table *table;
+	struct nf_hook_ops *hooks;
+	int err;
+
+	envid = get_exec_env();
+	if (ve_is_super(envid)) {
+		table = &packet_mangler;
+		hooks = ipt_ops;
+	} else {
+		__module_get(THIS_MODULE);
+		err = -ENOMEM;
+		table = kmalloc(sizeof(packet_mangler), GFP_KERNEL);
+		if (table == NULL)
+			goto nomem_1;
+		hooks = kmalloc(sizeof(ipt_ops), GFP_KERNEL);
+		if (hooks == NULL)
+			goto nomem_2;
+
+		memcpy(table, &packet_mangler, sizeof(packet_mangler));
+		memcpy(hooks, ipt_ops, sizeof(ipt_ops));
+	}
+	envid->_ipt_mangle_hooks = hooks;
+	envid->_ipt_mangle_table = table;
+
+	err = mangle_init(table, hooks);
+	if (err)
+		goto err_minit;
+
+	return 0;
+
+err_minit:
+	envid->_ipt_mangle_table = NULL;
+	envid->_ipt_mangle_hooks = NULL;
+	if (!ve_is_super(envid))
+		kfree(hooks);
+nomem_2:
+	if (!ve_is_super(envid)) {
+		kfree(table);
+nomem_1:
+		module_put(THIS_MODULE);
+	}
+	return err;
+#else
+	return mangle_init(&packet_mangler, ipt_ops);
+#endif
+}
+
+void fini_iptable_mangle(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+	struct ve_struct *envid;
+	struct ipt_table *table;
+	struct nf_hook_ops *hooks;
+
+	envid = get_exec_env();
+	table = envid->_ipt_mangle_table;
+	hooks = envid->_ipt_mangle_hooks;
+	if (table == NULL)
+		return;
+	mangle_fini(table, hooks);
+	envid->_ipt_mangle_table = NULL;
+	envid->_ipt_mangle_hooks = NULL;
+	if (!ve_is_super(envid)) {
+		kfree(hooks);
+		kfree(table);
+		module_put(THIS_MODULE);
+	}
+#else
+	mangle_fini(&packet_mangler, ipt_ops);
+#endif
+}
+
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_mangle();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_mangle);
+	KSYMRESOLVE(fini_iptable_mangle);
+	KSYMMODRESOLVE(iptable_mangle);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_mangle);
+	KSYMUNRESOLVE(init_iptable_mangle);
+	KSYMUNRESOLVE(fini_iptable_mangle);
+	fini_iptable_mangle();
 }
 
 module_init(init);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/proc.c linux-2.6.9-ve023stab054/net/ipv4/proc.c
--- linux-2.6.9-100.orig/net/ipv4/proc.c	2004-10-19 01:53:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/proc.c	2011-06-15 19:26:20.000000000 +0400
@@ -62,6 +62,9 @@ static int sockstat_seq_show(struct seq_
 	/* From net/socket.c */
 	extern void socket_seq_show(struct seq_file *seq);
 
+	if (!ve_is_super(get_exec_env()))
+		return 0;
+
 	socket_seq_show(seq);
 	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
 		   fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
@@ -262,11 +265,12 @@ static int snmp_seq_show(struct seq_file
 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
 
 	seq_printf(seq, "\nIp: %d %d",
-			ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
+			ve_ipv4_devconf.forwarding ? 1 : 2,
+			sysctl_ip_default_ttl);
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) ip_statistics, 
+			   fold_field((void **) ve_ip_statistics, 
 				      snmp4_ipstats_list[i].entry));
 
 	seq_puts(seq, "\nIcmp:");
@@ -276,7 +280,7 @@ static int snmp_seq_show(struct seq_file
 	seq_puts(seq, "\nIcmp:");
 	for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) icmp_statistics, 
+			   fold_field((void **) ve_icmp_statistics, 
 				      snmp4_icmp_list[i].entry));
 
 	seq_puts(seq, "\nTcp:");
@@ -288,11 +292,11 @@ static int snmp_seq_show(struct seq_file
 		/* MaxConn field is signed, RFC 2012 */
 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
 			seq_printf(seq, " %ld",
-				   fold_field((void **) tcp_statistics, 
+				   fold_field((void **) ve_tcp_statistics, 
 					      snmp4_tcp_list[i].entry));
 		else
 			seq_printf(seq, " %lu",
-				   fold_field((void **) tcp_statistics,
+				   fold_field((void **) ve_tcp_statistics,
 					      snmp4_tcp_list[i].entry));
 	}
 
@@ -303,7 +307,7 @@ static int snmp_seq_show(struct seq_file
 	seq_puts(seq, "\nUdp:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) udp_statistics, 
+			   fold_field((void **) ve_udp_statistics, 
 				      snmp4_udp_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -337,7 +341,7 @@ static int netstat_seq_show(struct seq_f
 	seq_puts(seq, "\nTcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) net_statistics, 
+			   fold_field((void **) ve_net_statistics, 
 				      snmp4_net_list[i].entry));
 
 	seq_putc(seq, '\n');
diff -Nurap linux-2.6.9-100.orig/net/ipv4/raw.c linux-2.6.9-ve023stab054/net/ipv4/raw.c
--- linux-2.6.9-100.orig/net/ipv4/raw.c	2011-06-09 19:22:55.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/raw.c	2011-06-15 19:26:19.000000000 +0400
@@ -114,7 +114,8 @@ struct sock *__raw_v4_lookup(struct sock
 		if (inet->num == num 					&&
 		    !(inet->daddr && inet->daddr != raddr) 		&&
 		    !(inet->rcv_saddr && inet->rcv_saddr != laddr)	&&
-		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) &&
+		    ve_accessible_strict(VE_OWNER_SK(sk), get_exec_env()))
 			goto found; /* gotcha */
 	}
 	sk = NULL;
@@ -741,8 +742,12 @@ static struct sock *raw_get_first(struct
 		struct hlist_node *node;
 
 		sk_for_each(sk, node, &raw_v4_htable[state->bucket])
-			if (sk->sk_family == PF_INET)
+			if (sk->sk_family == PF_INET) {
+				if (!ve_accessible(VE_OWNER_SK(sk),
+							get_exec_env()))
+					continue;
 				goto found;
+			}
 	}
 	sk = NULL;
 found:
@@ -756,8 +761,14 @@ static struct sock *raw_get_next(struct 
 	do {
 		sk = sk_next(sk);
 try_again:
-		;
-	} while (sk && sk->sk_family != PF_INET);
+		if (!sk)
+			break;
+		if (sk->sk_family != PF_INET)
+			continue;
+		if (ve_accessible(VE_OWNER_SK(sk),
+					get_exec_env()))
+			break;
+	} while (1);
 
 	if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
 		sk = sk_head(&raw_v4_htable[state->bucket]);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/route.c linux-2.6.9-ve023stab054/net/ipv4/route.c
--- linux-2.6.9-100.orig/net/ipv4/route.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/route.c	2011-06-15 19:26:19.000000000 +0400
@@ -92,6 +92,7 @@
 #include <linux/jhash.h>
 #include <linux/rcupdate.h>
 #include <linux/times.h>
+#include <linux/kmem_cache.h>
 #include <net/protocol.h>
 #include <net/ip.h>
 #include <net/route.h>
@@ -111,6 +112,8 @@
 
 #define RT_GC_TIMEOUT (300*HZ)
 
+int ip_rt_src_check		= 1;
+
 int ip_rt_min_delay		= 2 * HZ;
 int ip_rt_max_delay		= 10 * HZ;
 int ip_rt_max_size;
@@ -251,11 +254,28 @@ static unsigned int rt_hash_code(u32 dad
 		& rt_hash_mask);
 }
 
+void prepare_rt_cache(void)
+{
+#ifdef CONFIG_VE
+	struct rtable *r;
+	int i;
+
+	for (i = rt_hash_mask; i >= 0; i--) {
+		spin_lock_bh(rt_hash_lock_addr(i));
+		for (r = rt_hash_table[i].chain; r; r = r->u.rt_next) {
+			r->fl.owner_env = get_ve0();
+		}
+		spin_unlock_bh(rt_hash_lock_addr(i));
+        }
+#endif
+}
+
 #ifdef CONFIG_PROC_FS
 struct rt_cache_iter_state {
 	int bucket;
 };
 
+static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r);
 static struct rtable *rt_cache_get_first(struct seq_file *seq)
 {
 	struct rtable *r = NULL;
@@ -268,6 +288,8 @@ static struct rtable *rt_cache_get_first
 			break;
 		rcu_read_unlock_bh();
 	}
+	if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
+		r = rt_cache_get_next(seq, r);
 	return r;
 }
 
@@ -275,6 +297,7 @@ static struct rtable *rt_cache_get_next(
 {
 	struct rt_cache_iter_state *st = rcu_dereference(seq->private);
 
+loop:
 	r = r->u.rt_next;
 	while (!r) {
 		rcu_read_unlock_bh();
@@ -283,6 +306,8 @@ static struct rtable *rt_cache_get_next(
 		rcu_read_lock_bh();
 		r = rt_hash_table[st->bucket].chain;
 	}
+	if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
+		goto loop;
 	return r;
 }
 
@@ -592,26 +617,106 @@ static void rt_check_expire(unsigned lon
 	mod_timer(&rt_periodic_timer, now + ip_rt_gc_interval);
 }
 
+typedef unsigned long rt_flush_gen_t;
+
+#ifdef CONFIG_VE
+
+static rt_flush_gen_t rt_flush_gen;
+
+/* called under rt_flush_lock */
+static void set_rt_flush_required(struct ve_struct *env)
+{
+	/*
+	 * If the global generation rt_flush_gen is equal to G, then
+	 * the pass considering entries labelled by G is yet to come.
+	 */
+	env->rt_flush_required = rt_flush_gen;
+}
+
+static spinlock_t rt_flush_lock;
+static rt_flush_gen_t reset_rt_flush_required(void)
+{
+	rt_flush_gen_t g;
+
+	spin_lock_bh(&rt_flush_lock);
+	g = rt_flush_gen++;
+	spin_unlock_bh(&rt_flush_lock);
+	return g;
+}
+
+static int check_rt_flush_required(struct ve_struct *env, rt_flush_gen_t gen)
+{
+	/* can be checked without the lock */
+	return env->rt_flush_required >= gen;
+}
+
+#else
+
+static void set_rt_flush_required(struct ve_struct *env)
+{
+}
+
+static rt_flush_gen_t reset_rt_flush_required(void)
+{
+	return 0;
+}
+
+#endif
+
 /* This can run from both BH and non-BH contexts, the latter
  * in the case of a forced flush event.
  */
 static void rt_run_flush(unsigned long dummy)
 {
 	int i;
-	struct rtable *rth, *next;
+	struct rtable * rth, * next;
+	struct rtable * tail;
+	rt_flush_gen_t gen;
 
 	rt_deadline = 0;
 
 	get_random_bytes(&rt_hash_rnd, 4);
 
+	gen = reset_rt_flush_required();
+
 	for (i = rt_hash_mask; i >= 0; i--) {
+#ifdef CONFIG_VE
+		struct rtable ** prev, * p;
+
 		spin_lock_bh(rt_hash_lock_addr(i));
 		rth = rt_hash_table[i].chain;
+
+		/* defer releasing the head of the list after spin_unlock */
+		for (tail = rth; tail; tail = tail->u.rt_next)
+			if (!check_rt_flush_required(tail->fl.owner_env, gen))
+				break;
+		if (rth != tail)
+			rt_hash_table[i].chain = tail;
+
+		/* call rt_free on entries after the tail requiring flush */
+		prev = &rt_hash_table[i].chain;
+		for (p = *prev; p; p = next) {
+			next = p->u.rt_next;
+			if (!check_rt_flush_required(p->fl.owner_env, gen)) {
+				prev = &p->u.rt_next;
+			} else {
+				*prev = next;
+				rt_free(p);
+			}
+		}
+
+#else
+		spin_lock_bh(rt_hash_lock_addr(i));
+		rth = rt_hash_table[i].chain;
+
 		if (rth)
 			rt_hash_table[i].chain = NULL;
+		tail = NULL;
+
+#endif
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
-		for (; rth; rth = next) {
+		for (; rth != tail; rth = next) {
 			next = rth->u.rt_next;
 			rt_free(rth);
 		}
@@ -647,6 +752,8 @@ void rt_cache_flush(int delay)
 			delay = tmo;
 	}
 
+	set_rt_flush_required(get_exec_env());
+
 	if (delay <= 0) {
 		spin_unlock_bh(&rt_flush_lock);
 		rt_run_flush(0);
@@ -662,9 +769,30 @@ void rt_cache_flush(int delay)
 
 static void rt_secret_rebuild(unsigned long dummy)
 {
+	int i;
+	struct rtable *rth, *next;
 	unsigned long now = jiffies;
 
-	rt_cache_flush(0);
+	spin_lock_bh(&rt_flush_lock);
+	del_timer(&rt_flush_timer);
+	spin_unlock_bh(&rt_flush_lock);
+
+	rt_deadline = 0;
+	get_random_bytes(&rt_hash_rnd, 4);
+
+	for (i = rt_hash_mask; i >= 0; i--) {
+		spin_lock_bh(rt_hash_lock_addr(i));
+		rth = rt_hash_table[i].chain;
+		if (rth)
+			rt_hash_table[i].chain = NULL;
+		spin_unlock_bh(rt_hash_lock_addr(i));
+
+		for (; rth; rth = next) {
+			next = rth->u.rt_next;
+			rt_free(rth);
+		}
+	}
+
 	mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
 }
 
@@ -806,7 +934,8 @@ static inline int compare_keys(struct fl
 {
 	return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
 	       fl1->oif     == fl2->oif &&
-	       fl1->iif     == fl2->iif;
+	       fl1->iif     == fl2->iif &&
+	       ve_accessible_strict(fl1->owner_env, fl2->owner_env);
 }
 
 static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
@@ -890,6 +1019,8 @@ restart:
 	if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
 		int err = arp_bind_neighbour(&rt->u.dst);
 		if (err) {
+			struct dst_entry *dst;
+
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
 			if (err != -ENOBUFS) {
@@ -912,8 +1043,14 @@ restart:
 				goto restart;
 			}
 
-			if (net_ratelimit())
-				printk(KERN_WARNING "Neighbour table overflow.\n");
+			if (net_ratelimit()) {
+				dst = &rt->u.dst;
+				printk(KERN_WARNING "Neighbour table overflow "
+						"(env %u, dev %s, hop %08x)\n",
+					VEID(get_exec_env()),
+					dst->dev ? dst->dev->name : "(no)",
+					((struct rtable *)dst)->rt_gateway);
+			}
 			rt_drop(rt);
 			return -ENOBUFS;
 		}
@@ -1019,7 +1156,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 	u32  skeys[2] = { saddr, 0 };
 	int  ikeys[2] = { dev->ifindex, 0 };
 	struct netevent_redirect netevent;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	tos &= IPTOS_RT_MASK;
 
 	if (!in_dev)
@@ -1055,6 +1194,10 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 				    rth->fl.fl4_src != skeys[i] ||
 				    rth->fl.fl4_tos != tos ||
 				    rth->fl.oif != ikeys[k] ||
+#ifdef CONFIG_VE
+				    !ve_accessible_strict(rth->fl.owner_env,
+					    		  ve) ||
+#endif
 				    rth->fl.iif != 0) {
 					rthp = &rth->u.rt_next;
 					continue;
@@ -1093,6 +1236,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 				rt->u.dst.neighbour	= NULL;
 				rt->u.dst.hh		= NULL;
 				rt->u.dst.xfrm		= NULL;
+#ifdef CONFIG_VE
+				rt->fl.owner_env = ve;
+#endif
 
 				rt->rt_flags		|= RTCF_REDIRECTED;
 
@@ -1541,6 +1687,9 @@ static int ip_route_input_mc(struct sk_b
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -1548,7 +1697,7 @@ static int ip_route_input_mc(struct sk_b
 #endif
 	rth->rt_iif	=
 	rth->fl.iif	= dev->ifindex;
-	rth->u.dst.dev	= &loopback_dev;
+	rth->u.dst.dev	= &visible_loopback_dev;
 	dev_hold(rth->u.dst.dev);
 	rth->idev	= in_dev_get(rth->u.dst.dev);
 	rth->fl.oif	= 0;
@@ -1658,7 +1807,7 @@ static int ip_route_input_slow(struct sk
 	if (res.type == RTN_LOCAL) {
 		int result;
 		result = fib_validate_source(saddr, daddr, tos,
-					     loopback_dev.ifindex,
+					     visible_loopback_dev.ifindex,
 					     dev, &spec_dst, &itag);
 		if (result < 0)
 			goto martian_source;
@@ -1722,6 +1871,9 @@ static int ip_route_input_slow(struct sk
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 	rth->rt_gateway	= daddr;
@@ -1785,6 +1937,9 @@ local_input:
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -1792,7 +1947,7 @@ local_input:
 #endif
 	rth->rt_iif	=
 	rth->fl.iif	= dev->ifindex;
-	rth->u.dst.dev	= &loopback_dev;
+	rth->u.dst.dev	= &visible_loopback_dev;
 	dev_hold(rth->u.dst.dev);
 	rth->idev	= in_dev_get(rth->u.dst.dev);
 	rth->rt_gateway	= daddr;
@@ -1885,6 +2040,9 @@ int ip_route_input(struct sk_buff *skb, 
 #ifdef CONFIG_IP_ROUTE_FWMARK
 		    rth->fl.fl4_fwmark == skb->nfmark &&
 #endif
+#ifdef CONFIG_VE
+		    rth->fl.owner_env == get_exec_env() &&
+#endif
 		    rth->fl.fl4_tos == tos) {
 			rth->u.dst.lastuse = jiffies;
 			dst_hold(&rth->u.dst);
@@ -1950,7 +2108,7 @@ static int ip_route_output_slow(struct r
 					.fwmark = oldflp->fl4_fwmark
 #endif
 				      } },
-			    .iif = loopback_dev.ifindex,
+			    .iif = visible_loopback_dev.ifindex,
 			    .oif = oldflp->oif };
 	struct fib_result res;
 	unsigned flags = 0;
@@ -1973,10 +2131,13 @@ static int ip_route_output_slow(struct r
 		    ZERONET(oldflp->fl4_src))
 			goto out;
 
-		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-		dev_out = ip_dev_find(oldflp->fl4_src);
-		if (dev_out == NULL)
-			goto out;
+		if (ip_rt_src_check) {
+			/* It is equivalent to
+			   inet_addr_type(saddr) == RTN_LOCAL */
+			dev_out = ip_dev_find(oldflp->fl4_src);
+			if (dev_out == NULL)
+				goto out;
+		}
 
 		/* I removed check for oif == dev_out->oif here.
 		   It was wrong for two reasons:
@@ -2003,6 +2164,12 @@ static int ip_route_output_slow(struct r
 			   Luckily, this hack is good workaround.
 			 */
 
+			if (dev_out == NULL) {
+				dev_out = ip_dev_find(oldflp->fl4_src);
+				if (dev_out == NULL)
+					goto out;
+			}
+
 			fl.oif = dev_out->ifindex;
 			goto make_route;
 		}
@@ -2042,9 +2209,9 @@ static int ip_route_output_slow(struct r
 			fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
 		if (dev_out)
 			dev_put(dev_out);
-		dev_out = &loopback_dev;
+		dev_out = &visible_loopback_dev;
 		dev_hold(dev_out);
-		fl.oif = loopback_dev.ifindex;
+		fl.oif = visible_loopback_dev.ifindex;
 		res.type = RTN_LOCAL;
 		flags |= RTCF_LOCAL;
 		goto make_route;
@@ -2089,7 +2256,7 @@ static int ip_route_output_slow(struct r
 			fl.fl4_src = fl.fl4_dst;
 		if (dev_out)
 			dev_put(dev_out);
-		dev_out = &loopback_dev;
+		dev_out = &visible_loopback_dev;
 		dev_hold(dev_out);
 		fl.oif = dev_out->ifindex;
 		if (res.fi)
@@ -2171,6 +2338,9 @@ make_route:
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->rt_dst	= fl.fl4_dst;
 	rth->rt_src	= fl.fl4_src;
 	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
@@ -2246,6 +2416,7 @@ int __ip_route_output_key(struct rtable 
 #ifdef CONFIG_IP_ROUTE_FWMARK
 		    rth->fl.fl4_fwmark == flp->fl4_fwmark &&
 #endif
+		    ve_accessible_strict(rth->fl.owner_env, get_exec_env()) &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
 			rth->u.dst.lastuse = jiffies;
@@ -2355,7 +2526,7 @@ static int rt_fill_info(struct sk_buff *
 		u32 dst = rt->rt_dst;
 
 		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
-		    ipv4_devconf.mc_forwarding) {
+		    ve_ipv4_devconf.mc_forwarding) {
 			int err = ipmr_get_route(skb, r, nowait);
 			if (err <= 0) {
 				if (!nowait) {
@@ -2508,6 +2679,11 @@ void ip_rt_multicast_event(struct in_dev
 #ifdef CONFIG_SYSCTL
 static int flush_delay;
 
+void *get_flush_delay_addr(void)
+{
+	return &flush_delay;
+}
+
 static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
 					struct file *filp, void __user *buffer,
 					size_t *lenp, loff_t *ppos)
@@ -2521,6 +2697,13 @@ static int ipv4_sysctl_rtcache_flush(ctl
 	return -EINVAL;
 }
 
+int visible_ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+					struct file *filp, void __user *buffer,
+					size_t *lenp, loff_t *ppos)
+{
+	return ipv4_sysctl_rtcache_flush(ctl, write, filp, buffer, lenp, ppos);
+}
+
 static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
 						int __user *name,
 						int nlen,
@@ -2539,6 +2722,19 @@ static int ipv4_sysctl_rtcache_flush_str
 	return 0;
 }
 
+int visible_ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+						int __user *name,
+						int nlen,
+						void __user *oldval,
+						size_t __user *oldlenp,
+						void __user *newval,
+						size_t newlen,
+						void **context)
+{
+	return ipv4_sysctl_rtcache_flush_strategy(table, name, nlen, oldval,
+			oldlenp, newval, newlen, context);
+}
+
 ctl_table ipv4_route_table[] = {
         {
 		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
@@ -2761,6 +2957,7 @@ __setup("rhash_entries=", set_rhash_entr
 int __init ip_rt_init(void)
 {
 	int rc = 0;
+	int scale;
 
 	rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
 			     (jiffies ^ (jiffies >> 7)));
@@ -2786,10 +2983,16 @@ int __init ip_rt_init(void)
 	if (!ipv4_dst_ops.kmem_cachep)
 		panic("IP: failed to allocate ip_dst_cache\n");
 
+	scale = num_physpages > 128 * 1024 ? 15 : 17;
+#if defined(CONFIG_DEBUG_SLAB) && defined(CONFIG_DEBUG_PAGEALLOC)
+	scale += long_log2(ipv4_dst_ops.kmem_cachep->objsize /
+			sizeof(struct rtable));
+#endif
+
 	rt_hash_table = net_alloc_hash("IP route cache",
 				       sizeof(struct rt_hash_bucket),
 				       rhash_entries,
-				       (num_physpages >= 128 * 1024) ? 15 : 17,
+				       scale,
 				       &rt_hash_log,
 				       &rt_hash_mask,
 				       0);
@@ -2838,7 +3041,7 @@ int __init ip_rt_init(void)
 	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
 	}
 #ifdef CONFIG_NET_CLS_ROUTE
-	create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
+	create_proc_read_entry("net/rt_acct", 0, NULL, ip_rt_acct_read, NULL);
 #endif
 #endif
 #ifdef CONFIG_XFRM
diff -Nurap linux-2.6.9-100.orig/net/ipv4/sysctl_net_ipv4.c linux-2.6.9-ve023stab054/net/ipv4/sysctl_net_ipv4.c
--- linux-2.6.9-100.orig/net/ipv4/sysctl_net_ipv4.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/sysctl_net_ipv4.c	2011-06-15 19:26:19.000000000 +0400
@@ -49,6 +49,8 @@ extern int inet_peer_maxttl;
 extern int inet_peer_gc_mintime;
 extern int inet_peer_gc_maxtime;
 
+int sysctl_tcp_use_sg = 1;
+
 #ifdef CONFIG_SYSCTL
 static int tcp_retr1_max = 255; 
 static int ip_local_port_range_min[] = { 1, 1 };
@@ -65,17 +67,23 @@ static
 int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
 			void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	int val = ipv4_devconf.forwarding;
+	int val = ve_ipv4_devconf.forwarding;
 	int ret;
 
 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
-	if (write && ipv4_devconf.forwarding != val)
+	if (write && ve_ipv4_devconf.forwarding != val)
 		inet_forward_change();
 
 	return ret;
 }
 
+int visible_ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return ipv4_sysctl_forward(ctl, write, filp, buffer, lenp, ppos);
+}
+
 static int ipv4_sysctl_forward_strategy(ctl_table *table,
 			 int __user *name, int nlen,
 			 void __user *oldval, size_t __user *oldlenp,
@@ -118,6 +126,16 @@ static int ipv4_sysctl_forward_strategy(
 	return 1;
 }
 
+int visible_ipv4_sysctl_forward_strategy(ctl_table *table,
+			 int __user *name, int nlen,
+			 void __user *oldval, size_t __user *oldlenp,
+			 void __user *newval, size_t newlen, 
+			 void **context)
+{
+	return ipv4_sysctl_forward_strategy(table, name, nlen,
+			oldval, oldlenp, newval, newlen, context);
+}
+
 ctl_table ipv4_table[] = {
         {
 		.ctl_name	= NET_IPV4_TCP_TIMESTAMPS,
@@ -226,6 +244,22 @@ ctl_table ipv4_table[] = {
 		.proc_handler	= &proc_dointvec
 	},
 	{
+		.ctl_name	= NET_TCP_MAX_TW_KMEM_FRACTION,
+		.procname	= "tcp_max_tw_kmem_fraction",
+		.data		= &sysctl_tcp_max_tw_kmem_fraction,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_MAX_TW_BUCKETS_VE,
+		.procname	= "tcp_max_tw_buckets_ve",
+		.data		= &sysctl_tcp_max_tw_buckets_ve,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
 		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
 		.procname	= "ipfrag_high_thresh",
 		.data		= &sysctl_ipfrag_high_thresh,
@@ -715,6 +749,14 @@ ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= NET_TCP_USE_SG,
+		.procname	= "tcp_use_sg",
+		.data		= &sysctl_tcp_use_sg,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
diff -Nurap linux-2.6.9-100.orig/net/ipv4/tcp.c linux-2.6.9-ve023stab054/net/ipv4/tcp.c
--- linux-2.6.9-100.orig/net/ipv4/tcp.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/tcp.c	2011-06-15 19:26:21.000000000 +0400
@@ -248,6 +248,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/kmem_cache.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
@@ -262,6 +263,9 @@
 #include <net/xfrm.h>
 #include <net/ip.h>
 
+#include <ub/ub_orphan.h>
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -275,6 +279,7 @@ kmem_cache_t *tcp_bucket_cachep;
 kmem_cache_t *tcp_timewait_cachep;
 
 atomic_t tcp_orphan_count = ATOMIC_INIT(0);
+EXPORT_SYMBOL(tcp_orphan_count);
 
 int sysctl_tcp_mem[3];
 int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
@@ -331,6 +336,7 @@ unsigned int tcp_poll(struct file *file,
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 	struct tcp_opt *tp = tcp_sk(sk);
+	int check_send_space;
 
 	sock_poll_wait(file, sk->sk_sleep, wait);
 	if (sk->sk_state == TCP_LISTEN)
@@ -345,6 +351,21 @@ unsigned int tcp_poll(struct file *file,
 	if (sk->sk_err)
 		mask = POLLERR;
 
+	check_send_space = 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (!(sk->sk_shutdown & SEND_SHUTDOWN) && sock_has_ubc(sk)) {
+		unsigned long size;
+		size = MAX_TCP_HEADER + tp->mss_cache;
+		if (size > SOCK_MIN_UBCSPACE)
+			size = SOCK_MIN_UBCSPACE;
+		size = skb_charge_size(size);   
+		if (ub_sock_makewres_tcp(sk, size)) {
+			check_send_space = 0;
+			ub_sock_sndqueueadd_tcp(sk, size);
+		}
+	}
+#endif
+
 	/*
 	 * POLLHUP is certainly not done right. But poll() doesn't
 	 * have a notion of HUP in just one direction, and for a
@@ -388,7 +409,7 @@ unsigned int tcp_poll(struct file *file,
 		     sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
 			mask |= POLLIN | POLLRDNORM;
 
-		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+		if (check_send_space && !(sk->sk_shutdown & SEND_SHUTDOWN)) {
 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
 				mask |= POLLOUT | POLLWRNORM;
 			} else {  /* send SIGIO later */
@@ -564,7 +585,7 @@ static void tcp_listen_stop (struct sock
 
 		sock_orphan(child);
 
-		atomic_inc(&tcp_orphan_count);
+		tcp_inc_orphan_count(child);
 
 		tcp_destroy_sock(child);
 
@@ -657,16 +678,23 @@ static ssize_t do_tcp_sendpages(struct s
 		int copy, i, can_coalesce;
 		int offset = poffset % PAGE_SIZE;
 		int size = min_t(size_t, psize, PAGE_SIZE - offset);
+		unsigned long chargesize = 0;
 
 		if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) {
 new_segment:
+			chargesize = 0;
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
+			chargesize = skb_charge_size(MAX_TCP_HEADER +
+						     tp->mss_cache);
+			if (ub_sock_getwres_tcp(sk, chargesize) < 0)
+				goto wait_for_ubspace;
 			skb = sk_stream_alloc_pskb(sk, 0, 0,
 						   sk->sk_allocation);
 			if (!skb)
 				goto wait_for_memory;
+			ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
 
 			skb_entail(sk, tp, skb);
 			copy = mss_now;
@@ -723,10 +751,15 @@ new_segment:
 wait_for_sndbuf:
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
+		ub_sock_retwres_tcp(sk, chargesize,
+			skb_charge_size(MAX_TCP_HEADER + tp->mss_cache));
+		chargesize = 0;
+wait_for_ubspace:
 		if (copied)
 			tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
-		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+		err = __sk_stream_wait_memory(sk, &timeo, chargesize);
+		if (err != 0)
 			goto do_error;
 
 		mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
@@ -766,9 +799,6 @@ ssize_t tcp_sendpage(struct socket *sock
 	return res;
 }
 
-#define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
-#define TCP_OFF(sk)	(sk->sk_sndmsg_off)
-
 static inline int select_size(struct sock *sk, struct tcp_opt *tp)
 {
 	int tmp = tp->mss_cache_std;
@@ -822,6 +852,7 @@ int tcp_sendmsg(struct kiocb *iocb, stru
 	while (--iovlen >= 0) {
 		int seglen = iov->iov_len;
 		unsigned char __user *from = iov->iov_base;
+		unsigned long chargesize = 0;
 
 		iov++;
 
@@ -832,18 +863,26 @@ int tcp_sendmsg(struct kiocb *iocb, stru
 
 			if (!sk->sk_send_head ||
 			    (copy = mss_now - skb->len) <= 0) {
+				unsigned long size;
 
 new_segment:
 				/* Allocate new segment. If the interface is SG,
 				 * allocate skb fitting to single page.
 				 */
+				chargesize = 0;
 				if (!sk_stream_memory_free(sk))
 					goto wait_for_sndbuf;
-
-				skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
-							   0, sk->sk_allocation);
+				size = select_size(sk, tp);
+				chargesize = skb_charge_size(MAX_TCP_HEADER +
+							     size);
+				if (ub_sock_getwres_tcp(sk, chargesize) < 0)
+					goto wait_for_ubspace;
+				skb = sk_stream_alloc_pskb(sk, size, 0,
+							   sk->sk_allocation);
 				if (!skb)
 					goto wait_for_memory;
+				ub_skb_set_charge(skb, sk, chargesize,
+						  UB_TCPSNDBUF);
 
 				/*
 				 * Check whether we can use HW checksum.
@@ -896,11 +935,15 @@ new_segment:
 					      ~(L1_CACHE_BYTES - 1);
 					if (off == PAGE_SIZE) {
 						put_page(page);
+						ub_sock_tcp_detachpage(sk);
 						TCP_PAGE(sk) = page = NULL;
 					}
 				}
 
 				if (!page) {
+					chargesize = PAGE_SIZE;
+					if (ub_sock_tcp_chargepage(sk) < 0)
+						goto wait_for_ubspace;
 					/* Allocate new cache page. */
 					if (!(page = sk_stream_alloc_page(sk)))
 						goto wait_for_memory;
@@ -936,7 +979,8 @@ new_segment:
 					} else if (off + copy < PAGE_SIZE) {
 						get_page(page);
 						TCP_PAGE(sk) = page;
-					}
+					} else
+						ub_sock_tcp_detachpage(sk);
 				}
 
 				TCP_OFF(sk) = off + copy;
@@ -967,10 +1011,15 @@ new_segment:
 wait_for_sndbuf:
 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
+			ub_sock_retwres_tcp(sk, chargesize,
+				skb_charge_size(MAX_TCP_HEADER+tp->mss_cache));
+			chargesize = 0;
+wait_for_ubspace:
 			if (copied)
 				tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
-			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+			err = __sk_stream_wait_memory(sk, &timeo, chargesize);
+			if (err != 0)
 				goto do_error;
 
 			mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
@@ -1067,7 +1116,18 @@ void cleanup_rbuf(struct sock *sk, int c
 #if TCP_DEBUG
 	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 
-	BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
+	if (!(skb==NULL || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq))) {
+		printk("KERNEL: assertion: skb==NULL || "
+				"before(tp->copied_seq, skb->end_seq)\n");
+		printk("VE%u pid %d comm %.16s\n", 
+				(get_exec_env() ? VEID(get_exec_env()) : 0),
+				current->pid, current->comm);
+		printk("copied=%d, copied_seq=%d, rcv_nxt=%d\n", copied,
+				tp->copied_seq, tp->rcv_nxt);
+		printk("skb->len=%d, skb->seq=%d, skb->end_seq=%d\n",
+				skb->len, TCP_SKB_CB(skb)->seq, 
+				TCP_SKB_CB(skb)->end_seq);
+	}
 #endif
 
 	if (tcp_ack_scheduled(tp)) {
@@ -1293,7 +1353,22 @@ int tcp_recvmsg(struct kiocb *iocb, stru
 				goto found_ok_skb;
 			if (skb->h.th->fin)
 				goto found_fin_ok;
-			BUG_TRAP(flags & MSG_PEEK);
+			if (!(flags & MSG_PEEK)) {
+				printk("KERNEL: assertion: flags&MSG_PEEK\n");
+				printk("VE%u pid %d comm %.16s\n", 
+						(get_exec_env() ? 
+						 VEID(get_exec_env()) : 0),
+						current->pid, current->comm);
+				printk("flags=0x%x, len=%d, copied_seq=%d, "
+						"rcv_nxt=%d\n", flags, len,
+						tp->copied_seq, tp->rcv_nxt);
+				printk("skb->len=%d, *seq=%d, skb->seq=%d, "
+						"skb->end_seq=%d, offset=%d\n",
+						skb->len, *seq, 
+						TCP_SKB_CB(skb)->seq,
+						TCP_SKB_CB(skb)->end_seq, 
+						offset);
+			}
 			skb = skb->next;
 		} while (skb != (struct sk_buff *)&sk->sk_receive_queue);
 
@@ -1356,8 +1431,18 @@ int tcp_recvmsg(struct kiocb *iocb, stru
 
 			tp->ucopy.len = len;
 
-			BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
-				 (flags & (MSG_PEEK | MSG_TRUNC)));
+			if (!(tp->copied_seq == tp->rcv_nxt || 
+						(flags&(MSG_PEEK|MSG_TRUNC)))) {
+				printk("KERNEL: assertion: tp->copied_seq == "
+						"tp->rcv_nxt || ...\n");
+				printk("VE%u pid %d comm %.16s\n", 
+						(get_exec_env() ?
+						 VEID(get_exec_env()) : 0),
+						current->pid, current->comm);
+				printk("flags=0x%x, len=%d, copied_seq=%d, "
+						"rcv_nxt=%d\n", flags, len,
+						tp->copied_seq, tp->rcv_nxt);
+			}
 
 			/* Ugly... If prequeue is not empty, we have to
 			 * process it before releasing socket, otherwise
@@ -1618,7 +1703,7 @@ void tcp_destroy_sock(struct sock *sk)
 	}
 #endif
 
-	atomic_dec(&tcp_orphan_count);
+	tcp_dec_orphan_count(sk);
 	sock_put(sk);
 }
 
@@ -1742,26 +1827,33 @@ adjudge_to_death:
 			if (tmo > TCP_TIMEWAIT_LEN) {
 				tcp_reset_keepalive_timer(sk, tcp_fin_time(tp));
 			} else {
-				atomic_inc(&tcp_orphan_count);
+				tcp_inc_orphan_count(sk);
 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
 				goto out;
 			}
 		}
 	}
 	if (sk->sk_state != TCP_CLOSE) {
+		int orphans = tcp_get_orphan_count(sk);
+
 		sk_stream_mem_reclaim(sk);
-		if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
-		    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-		     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
-			if (net_ratelimit())
+		if (tcp_too_many_orphans(sk, orphans)) {
+			if (net_ratelimit()) {
+				int ubid = 0;
+
+#ifdef CONFIG_USER_RESOURCE
+				ubid = sock_has_ubc(sk) ?
+				   top_beancounter(sock_bc(sk)->ub)->ub_uid : 0;
+#endif
 				printk(KERN_INFO "TCP: too many of orphaned "
-				       "sockets\n");
+				       "sockets (%d in VE%d)\n", orphans, ubid);
+			}
 			tcp_set_state(sk, TCP_CLOSE);
 			tcp_send_active_reset(sk, GFP_ATOMIC);
 			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
 		}
 	}
-	atomic_inc(&tcp_orphan_count);
+	tcp_inc_orphan_count(sk);
 
 	if (sk->sk_state == TCP_CLOSE)
 		tcp_destroy_sock(sk);
@@ -1827,12 +1919,13 @@ int tcp_disconnect(struct sock *sk, int 
 	tcp_set_pcount(&tp->packets_out, 0);
 	tp->snd_ssthresh = 0x7fffffff;
 	tp->snd_cwnd_cnt = 0;
+	tp->advmss = 65535;
 	tcp_set_ca_state(tp, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	tcp_delack_init(tp);
 	sk->sk_send_head = NULL;
-	tp->saw_tstamp = 0;
-	tcp_sack_reset(tp);
+	tp->rx_opt.saw_tstamp = 0;
+	tcp_sack_reset(&tp->rx_opt);
 	__sk_dst_reset(sk);
 
 	BUG_TRAP(!inet->num || tp->bind_hash);
@@ -1971,7 +2064,7 @@ int tcp_setsockopt(struct sock *sk, int 
 			err = -EINVAL;
 			break;
 		}
-		tp->user_mss = val;
+		tp->rx_opt.user_mss = val;
 		break;
 
 	case TCP_NODELAY:
@@ -2129,7 +2222,7 @@ int tcp_getsockopt(struct sock *sk, int 
 	case TCP_MAXSEG:
 		val = tp->mss_cache_std;
 		if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
-			val = tp->user_mss;
+			val = tp->rx_opt.user_mss;
 		break;
 	case TCP_NODELAY:
 		val = !!(tp->nonagle&TCP_NAGLE_OFF);
@@ -2193,6 +2286,7 @@ int tcp_getsockopt(struct sock *sk, int 
 
 extern void __skb_cb_too_small_for_tcp(int, int);
 extern void tcpdiag_init(void);
+extern unsigned int nr_free_lowpages(void);
 
 static __initdata unsigned long thash_entries;
 static int __init set_thash_entries(char *str)
@@ -2216,24 +2310,26 @@ void __init tcp_init(void)
 
 	tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
 						   sizeof(struct open_request),
-					       0, SLAB_HWCACHE_ALIGN,
+					       0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 					       NULL, NULL);
 	if (!tcp_openreq_cachep)
 		panic("tcp_init: Cannot alloc open_request cache.");
 
 	tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
 					      sizeof(struct tcp_bind_bucket),
-					      0, SLAB_HWCACHE_ALIGN,
+					      0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 					      NULL, NULL);
 	if (!tcp_bucket_cachep)
 		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
 
 	tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket",
 						sizeof(struct tcp_tw_bucket),
-						0, SLAB_HWCACHE_ALIGN,
+						0,
+						SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						NULL, NULL);
 	if (!tcp_timewait_cachep)
 		panic("tcp_init: Cannot alloc tcp_tw_bucket cache.");
+	tcp_timewait_cachep->flags |= CFLGS_ENVIDS;
 
 	/* Size and allocate the main established and bind bucket
 	 * hash tables.
@@ -2290,10 +2386,19 @@ void __init tcp_init(void)
 	}
 	tcp_port_rover = sysctl_local_port_range[0] - 1;
 
+	goal = nr_free_lowpages() / 6;
+	while (order >= 3 && (1536<<order) > goal)
+		order--;
+
 	sysctl_tcp_mem[0] =  768 << order;
 	sysctl_tcp_mem[1] = 1024 << order;
 	sysctl_tcp_mem[2] = 1536 << order;
 
+	if (sysctl_tcp_mem[2] - sysctl_tcp_mem[1] > 4096)
+		sysctl_tcp_mem[1] = sysctl_tcp_mem[2] - 4096;
+	if (sysctl_tcp_mem[1] - sysctl_tcp_mem[0] > 4096)
+		sysctl_tcp_mem[0] = sysctl_tcp_mem[1] - 4096;
+
 	if (order < 3) {
 		sysctl_tcp_wmem[2] = 64 * 1024;
 		sysctl_tcp_rmem[0] = PAGE_SIZE;
diff -Nurap linux-2.6.9-100.orig/net/ipv4/tcp_diag.c linux-2.6.9-ve023stab054/net/ipv4/tcp_diag.c
--- linux-2.6.9-100.orig/net/ipv4/tcp_diag.c	2004-10-19 01:53:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/tcp_diag.c	2011-06-15 19:26:19.000000000 +0400
@@ -55,14 +55,14 @@ void tcp_get_info(struct sock *sk, struc
 	info->tcpi_probes = tp->probes_out;
 	info->tcpi_backoff = tp->backoff;
 
-	if (tp->tstamp_ok)
+	if (tp->rx_opt.tstamp_ok)
 		info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
-	if (tp->sack_ok)
+	if (tp->rx_opt.sack_ok)
 		info->tcpi_options |= TCPI_OPT_SACK;
-	if (tp->wscale_ok) {
+	if (tp->rx_opt.wscale_ok) {
 		info->tcpi_options |= TCPI_OPT_WSCALE;
-		info->tcpi_snd_wscale = tp->snd_wscale;
-		info->tcpi_rcv_wscale = tp->rcv_wscale;
+		info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
+		info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
 	} 
 
 	if (tp->ecn_flags&TCP_ECN_OK)
@@ -261,7 +261,7 @@ static int tcpdiag_get_exact(struct sk_b
 		return -EINVAL;
 	}
 
-	if (sk == NULL)
+	if (sk == NULL || !ve_accessible(VE_OWNER_SK(sk), get_exec_env()))
 		return -ENOENT;
 
 	err = -ESTALE;
@@ -473,6 +473,9 @@ static int tcpdiag_dump(struct sk_buff *
 	int s_i, s_num;
 	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
 	struct rtattr *bc = NULL;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
 
 	if (cb->nlh->nlmsg_len > 4+NLMSG_SPACE(sizeof(struct tcpdiagreq)))
 		bc = (struct rtattr*)(r+1);
@@ -494,6 +497,9 @@ static int tcpdiag_dump(struct sk_buff *
 			num = 0;
 			sk_for_each(sk, node, &tcp_listening_hash[i]) {
 				struct inet_opt *inet = inet_sk(sk);
+
+				if (!ve_accessible(VE_OWNER_SK(sk), ve))
+					continue;
 				if (num < s_num)
 					goto next_listen;
 				if (!(r->tcpdiag_states&TCPF_LISTEN) ||
@@ -537,6 +543,8 @@ skip_listen_ht:
 		sk_for_each(sk, node, &head->chain) {
 			struct inet_opt *inet = inet_sk(sk);
 
+			if (!ve_accessible(VE_OWNER_SK(sk), ve))
+				continue;
 			if (num < s_num)
 				goto next_normal;
 			if (!(r->tcpdiag_states & (1 << sk->sk_state)))
@@ -562,7 +570,11 @@ next_normal:
 			sk_for_each(sk, node,
 				    &tcp_ehash[i + tcp_ehash_size].chain) {
 				struct inet_opt *inet = inet_sk(sk);
+				struct tcp_tw_bucket *tw;
 
+				tw = (struct tcp_tw_bucket*)sk;
+				if (!ve_accessible_veid(TW_VEID(tw), VEID(ve)))
+					continue;
 				if (num < s_num)
 					goto next_dying;
 				if (r->id.tcpdiag_sport != inet->sport &&
diff -Nurap linux-2.6.9-100.orig/net/ipv4/tcp_input.c linux-2.6.9-ve023stab054/net/ipv4/tcp_input.c
--- linux-2.6.9-100.orig/net/ipv4/tcp_input.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/tcp_input.c	2011-06-15 19:26:19.000000000 +0400
@@ -72,6 +72,8 @@
 #include <net/inet_common.h>
 #include <linux/ipsec.h>
 
+#include <ub/ub_tcp.h>
+
 int sysctl_tcp_timestamps = 1;
 int sysctl_tcp_window_scaling = 1;
 int sysctl_tcp_sack = 1;
@@ -119,9 +121,9 @@ int sysctl_tcp_bic_beta = 819;		/* = 819
 #define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE)
 #define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
 
-#define IsReno(tp) ((tp)->sack_ok == 0)
-#define IsFack(tp) ((tp)->sack_ok & 2)
-#define IsDSack(tp) ((tp)->sack_ok & 4)
+#define IsReno(tp) ((tp)->rx_opt.sack_ok == 0)
+#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
+#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
 
 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
 
@@ -206,7 +208,7 @@ static __inline__ int tcp_in_quickack_mo
 
 static void tcp_fixup_sndbuf(struct sock *sk)
 {
-	int sndmem = tcp_sk(sk)->mss_clamp + MAX_TCP_HEADER + 16 +
+	int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
 		     sizeof(struct sk_buff);
 
 	if (sk->sk_sndbuf < 3 * sndmem)
@@ -262,7 +264,7 @@ tcp_grow_window(struct sock *sk, struct 
 	/* Check #1 */
 	if (tp->rcv_ssthresh < tp->window_clamp &&
 	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
-	    !tcp_memory_pressure) {
+	    ub_tcp_rmem_allows_expand(sk)) {
 		int incr;
 
 		/* Check #2. Increase window, if skb with such overhead
@@ -331,6 +333,8 @@ static void tcp_init_buffer_space(struct
 
 	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
+
+	ub_tcp_update_maxadvmss(sk);
 }
 
 static void init_bictcp(struct tcp_opt *tp)
@@ -360,7 +364,7 @@ static void tcp_clamp_window(struct sock
 	if (ofo_win) {
 		if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
 		    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-		    !tcp_memory_pressure &&
+		    !ub_tcp_memory_pressure(sk) &&
 		    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
 			sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
 					    sysctl_tcp_rmem[2]);
@@ -431,10 +435,10 @@ new_measure:
 
 static inline void tcp_rcv_rtt_measure_ts(struct tcp_opt *tp, struct sk_buff *skb)
 {
-	if (tp->rcv_tsecr &&
+	if (tp->rx_opt.rcv_tsecr &&
 	    (TCP_SKB_CB(skb)->end_seq -
 	     TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
-		tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_tsecr, 0);
+		tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
 }
 
 /*
@@ -835,7 +839,7 @@ static void tcp_init_metrics(struct sock
 	}
 	if (dst_metric(dst, RTAX_REORDERING) &&
 	    tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
-		tp->sack_ok &= ~2;
+		tp->rx_opt.sack_ok &= ~2;
 		tp->reordering = dst_metric(dst, RTAX_REORDERING);
 	}
 
@@ -869,7 +873,7 @@ static void tcp_init_metrics(struct sock
 	}
 	tcp_set_rto(tp);
 	tcp_bound_rto(tp);
-	if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp)
+	if (tp->rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
 		goto reset;
 	tp->snd_cwnd = tcp_init_cwnd(tp, dst);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -880,7 +884,7 @@ reset:
 	 * supported, TCP will fail to recalculate correct
 	 * rtt, if initial rto is too small. FORGET ALL AND RESET!
 	 */
-	if (!tp->saw_tstamp && tp->srtt) {
+	if (!tp->rx_opt.saw_tstamp && tp->srtt) {
 		tp->srtt = 0;
 		tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
 		tp->rto = TCP_TIMEOUT_INIT;
@@ -903,14 +907,14 @@ static void tcp_update_reordering(struct
 			NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
 #if FASTRETRANS_DEBUG > 1
 		printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
-		       tp->sack_ok, tp->ca_state,
+		       tp->rx_opt.sack_ok, tp->ca_state,
 		       tp->reordering,
 		       tcp_get_pcount(&tp->fackets_out),
 		       tcp_get_pcount(&tp->sacked_out),
 		       tp->undo_marker ? tp->undo_retrans : 0);
 #endif
 		/* Disable FACK yet. */
-		tp->sack_ok &= ~2;
+		tp->rx_opt.sack_ok &= ~2;
 	}
 }
 
@@ -1000,13 +1004,13 @@ tcp_sacktag_write_queue(struct sock *sk,
 
 			if (before(start_seq, ack)) {
 				dup_sack = 1;
-				tp->sack_ok |= 4;
+				tp->rx_opt.sack_ok |= 4;
 				NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
 			} else if (num_sacks > 1 &&
 				   !after(end_seq, ntohl(sp[1].end_seq)) &&
 				   !before(start_seq, ntohl(sp[1].start_seq))) {
 				dup_sack = 1;
-				tp->sack_ok |= 4;
+				tp->rx_opt.sack_ok |= 4;
 				NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
 			}
 
@@ -1642,8 +1646,8 @@ static void tcp_cwnd_down(struct tcp_opt
 static __inline__ int tcp_packet_delayed(struct tcp_opt *tp)
 {
 	return !tp->retrans_stamp ||
-		(tp->saw_tstamp && tp->rcv_tsecr &&
-		 (__s32)(tp->rcv_tsecr - tp->retrans_stamp) < 0);
+		(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+		 (__s32)(tp->rx_opt.rcv_tsecr - tp->retrans_stamp) < 0);
 }
 
 /* Undo procedures. */
@@ -1997,7 +2001,7 @@ static void tcp_ack_saw_tstamp(struct so
 	 * in window is lost... Voila.	 			--ANK (010210)
 	 */
 	struct tcp_opt *tp = tcp_sk(sk);
-	seq_rtt = tcp_time_stamp - tp->rcv_tsecr;
+	seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
 	tcp_rtt_estimator(sk, seq_rtt);
 	tcp_set_rto(tp);
 	tp->backoff = 0;
@@ -2030,7 +2034,7 @@ tcp_ack_update_rtt(struct sock *sk, int 
 {
 	struct tcp_opt *tp = tcp_sk(sk);
 	/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
-	if (tp->saw_tstamp && tp->rcv_tsecr)
+	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
 		tcp_ack_saw_tstamp(sk, flag);
 	else if (seq_rtt >= 0)
 		tcp_ack_no_tstamp(sk, seq_rtt, flag);
@@ -2508,7 +2512,7 @@ static int tcp_clean_rtx_queue(struct so
 	BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0);
 	BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0);
 	BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0);
-	if (!tcp_get_pcount(&tp->packets_out) && tp->sack_ok) {
+	if (!tcp_get_pcount(&tp->packets_out) && tp->rx_opt.sack_ok) {
 		if (tcp_get_pcount(&tp->lost_out)) {
 			printk(KERN_DEBUG "Leak l=%u %d\n",
 			       tcp_get_pcount(&tp->lost_out),
@@ -2587,7 +2591,7 @@ static int tcp_ack_update_window(struct 
 	u32 nwin = ntohs(skb->h.th->window);
 
 	if (likely(!skb->h.th->syn))
-		nwin <<= tp->snd_wscale;
+		nwin <<= tp->rx_opt.snd_wscale;
 
 	if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
 		flag |= FLAG_WIN_UPDATE;
@@ -2995,14 +2999,15 @@ uninteresting_ack:
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
  */
-void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp, int estab)
+void tcp_parse_options(struct sk_buff *skb,
+		struct tcp_options_received *opt_rx, int estab)
 {
 	unsigned char *ptr;
 	struct tcphdr *th = skb->h.th;
 	int length=(th->doff*4)-sizeof(struct tcphdr);
 
 	ptr = (unsigned char *)(th + 1);
-	tp->saw_tstamp = 0;
+	opt_rx->saw_tstamp = 0;
 
 	while(length>0) {
 	  	int opcode=*ptr++;
@@ -3025,41 +3030,41 @@ void tcp_parse_options(struct sk_buff *s
 					if(opsize==TCPOLEN_MSS && th->syn && !estab) {
 						u16 in_mss = ntohs(*(__u16 *)ptr);
 						if (in_mss) {
-							if (tp->user_mss && tp->user_mss < in_mss)
-								in_mss = tp->user_mss;
-							tp->mss_clamp = in_mss;
+							if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
+								in_mss = opt_rx->user_mss;
+							opt_rx->mss_clamp = in_mss;
 						}
 					}
 					break;
 				case TCPOPT_WINDOW:
 					if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
 						if (sysctl_tcp_window_scaling) {
-							tp->wscale_ok = 1;
-							tp->snd_wscale = *(__u8 *)ptr;
-							if(tp->snd_wscale > 14) {
+							opt_rx->wscale_ok = 1;
+							opt_rx->snd_wscale = *(__u8 *)ptr;
+							if(opt_rx->snd_wscale > 14) {
 								if(net_ratelimit())
 									printk("tcp_parse_options: Illegal window "
 									       "scaling value %d >14 received.",
-									       tp->snd_wscale);
-								tp->snd_wscale = 14;
+									       opt_rx->snd_wscale);
+								opt_rx->snd_wscale = 14;
 							}
 						}
 					break;
 				case TCPOPT_TIMESTAMP:
 					if(opsize==TCPOLEN_TIMESTAMP) {
-						if ((estab && tp->tstamp_ok) ||
+						if ((estab && opt_rx->tstamp_ok) ||
 						    (!estab && sysctl_tcp_timestamps)) {
-							tp->saw_tstamp = 1;
-							tp->rcv_tsval = ntohl(*(__u32 *)ptr);
-							tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
+							opt_rx->saw_tstamp = 1;
+							opt_rx->rcv_tsval = ntohl(*(__u32 *)ptr);
+							opt_rx->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
 						}
 					}
 					break;
 				case TCPOPT_SACK_PERM:
 					if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
 						if (sysctl_tcp_sack) {
-							tp->sack_ok = 1;
-							tcp_sack_reset(tp);
+							opt_rx->sack_ok = 1;
+							tcp_sack_reset(opt_rx);
 						}
 					}
 					break;
@@ -3067,7 +3072,7 @@ void tcp_parse_options(struct sk_buff *s
 				case TCPOPT_SACK:
 					if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
 					   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
-					   tp->sack_ok) {
+					   opt_rx->sack_ok) {
 						TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
 					}
 	  			};
@@ -3083,36 +3088,36 @@ void tcp_parse_options(struct sk_buff *s
 static __inline__ int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_opt *tp)
 {
 	if (th->doff == sizeof(struct tcphdr)>>2) {
-		tp->saw_tstamp = 0;
+		tp->rx_opt.saw_tstamp = 0;
 		return 0;
-	} else if (tp->tstamp_ok &&
+	} else if (tp->rx_opt.tstamp_ok &&
 		   th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
 		__u32 *ptr = (__u32 *)(th + 1);
 		if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
 				  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
-			tp->saw_tstamp = 1;
+			tp->rx_opt.saw_tstamp = 1;
 			++ptr;
-			tp->rcv_tsval = ntohl(*ptr);
+			tp->rx_opt.rcv_tsval = ntohl(*ptr);
 			++ptr;
-			tp->rcv_tsecr = ntohl(*ptr);
+			tp->rx_opt.rcv_tsecr = ntohl(*ptr);
 			return 1;
 		}
 	}
-	tcp_parse_options(skb, tp, 1);
+	tcp_parse_options(skb, &tp->rx_opt, 1);
 	return 1;
 }
 
 static __inline__ void
 tcp_store_ts_recent(struct tcp_opt *tp)
 {
-	tp->ts_recent = tp->rcv_tsval;
-	tp->ts_recent_stamp = xtime.tv_sec;
+	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
+	tp->rx_opt.ts_recent_stamp = xtime.tv_sec;
 }
 
 static __inline__ void
 tcp_replace_ts_recent(struct tcp_opt *tp, u32 seq)
 {
-	if (tp->saw_tstamp && !after(seq, tp->rcv_wup)) {
+	if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
 		/* PAWS bug workaround wrt. ACK frames, the PAWS discard
 		 * extra check below makes sure this can only happen
 		 * for pure ACK frames.  -DaveM
@@ -3120,8 +3125,8 @@ tcp_replace_ts_recent(struct tcp_opt *tp
 		 * Not only, also it occurs for expired timestamps.
 		 */
 
-		if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0 ||
-		   xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
+		if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
+		   xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
 			tcp_store_ts_recent(tp);
 	}
 }
@@ -3162,16 +3167,16 @@ static int tcp_disordered_ack(struct tcp
 		ack == tp->snd_una &&
 
 		/* 3. ... and does not update window. */
-		!tcp_may_update_window(tp, ack, seq, ntohs(th->window)<<tp->snd_wscale) &&
+		!tcp_may_update_window(tp, ack, seq, ntohs(th->window)<<tp->rx_opt.snd_wscale) &&
 
 		/* 4. ... and sits in replay window. */
-		(s32)(tp->ts_recent - tp->rcv_tsval) <= (tp->rto*1024)/HZ);
+		(s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (tp->rto*1024)/HZ);
 }
 
 static __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct sk_buff *skb)
 {
-	return ((s32)(tp->ts_recent - tp->rcv_tsval) > TCP_PAWS_WINDOW &&
-		xtime.tv_sec < tp->ts_recent_stamp + TCP_PAWS_24DAYS &&
+	return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
+		xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
 		!tcp_disordered_ack(tp, skb));
 }
 
@@ -3284,8 +3289,8 @@ static void tcp_fin(struct sk_buff *skb,
 	 * Probably, we should reset in this case. For now drop them.
 	 */
 	__skb_queue_purge(&tp->out_of_order_queue);
-	if (tp->sack_ok)
-		tcp_sack_reset(tp);
+	if (tp->rx_opt.sack_ok)
+		tcp_sack_reset(&tp->rx_opt);
 	sk_stream_mem_reclaim(sk);
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
@@ -3315,22 +3320,22 @@ tcp_sack_extend(struct tcp_sack_block *s
 
 static __inline__ void tcp_dsack_set(struct tcp_opt *tp, u32 seq, u32 end_seq)
 {
-	if (tp->sack_ok && sysctl_tcp_dsack) {
+	if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
 		if (before(seq, tp->rcv_nxt))
 			NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT);
 		else
 			NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT);
 
-		tp->dsack = 1;
+		tp->rx_opt.dsack = 1;
 		tp->duplicate_sack[0].start_seq = seq;
 		tp->duplicate_sack[0].end_seq = end_seq;
-		tp->eff_sacks = min(tp->num_sacks+1, 4-tp->tstamp_ok);
+		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks+1, 4-tp->rx_opt.tstamp_ok);
 	}
 }
 
 static __inline__ void tcp_dsack_extend(struct tcp_opt *tp, u32 seq, u32 end_seq)
 {
-	if (!tp->dsack)
+	if (!tp->rx_opt.dsack)
 		tcp_dsack_set(tp, seq, end_seq);
 	else
 		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
@@ -3345,7 +3350,7 @@ static void tcp_send_dupack(struct sock 
 		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
 		tcp_enter_quickack_mode(tp);
 
-		if (tp->sack_ok && sysctl_tcp_dsack) {
+		if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -3369,16 +3374,16 @@ static void tcp_sack_maybe_coalesce(stru
 	/* See if the recent change to the first SACK eats into
 	 * or hits the sequence space of other SACK blocks, if so coalesce.
 	 */
-	for (this_sack = 1; this_sack < tp->num_sacks; ) {
+	for (this_sack = 1; this_sack < tp->rx_opt.num_sacks; ) {
 		if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
 			int i;
 
 			/* Zap SWALK, by moving every further SACK up by one slot.
 			 * Decrease num_sacks.
 			 */
-			tp->num_sacks--;
-			tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
-			for(i=this_sack; i < tp->num_sacks; i++)
+			tp->rx_opt.num_sacks--;
+			tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+			for(i=this_sack; i < tp->rx_opt.num_sacks; i++)
 				sp[i] = sp[i+1];
 			continue;
 		}
@@ -3403,7 +3408,7 @@ static void tcp_sack_new_ofo_skb(struct 
 {
 	struct tcp_opt *tp = tcp_sk(sk);
 	struct tcp_sack_block *sp = &tp->selective_acks[0];
-	int cur_sacks = tp->num_sacks;
+	int cur_sacks = tp->rx_opt.num_sacks;
 	int this_sack;
 
 	if (!cur_sacks)
@@ -3428,7 +3433,7 @@ static void tcp_sack_new_ofo_skb(struct 
 	 */
 	if (this_sack >= 4) {
 		this_sack--;
-		tp->num_sacks--;
+		tp->rx_opt.num_sacks--;
 		sp--;
 	}
 	for(; this_sack > 0; this_sack--, sp--)
@@ -3438,8 +3443,8 @@ new_sack:
 	/* Build the new head SACK, and we're done. */
 	sp->start_seq = seq;
 	sp->end_seq = end_seq;
-	tp->num_sacks++;
-	tp->eff_sacks = min(tp->num_sacks + tp->dsack, 4 - tp->tstamp_ok);
+	tp->rx_opt.num_sacks++;
+	tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
 }
 
 /* RCV.NXT advances, some SACKs should be eaten. */
@@ -3447,13 +3452,13 @@ new_sack:
 static void tcp_sack_remove(struct tcp_opt *tp)
 {
 	struct tcp_sack_block *sp = &tp->selective_acks[0];
-	int num_sacks = tp->num_sacks;
+	int num_sacks = tp->rx_opt.num_sacks;
 	int this_sack;
 
 	/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
 	if (skb_queue_len(&tp->out_of_order_queue) == 0) {
-		tp->num_sacks = 0;
-		tp->eff_sacks = tp->dsack;
+		tp->rx_opt.num_sacks = 0;
+		tp->rx_opt.eff_sacks = tp->rx_opt.dsack;
 		return;
 	}
 
@@ -3474,9 +3479,9 @@ static void tcp_sack_remove(struct tcp_o
 		this_sack++;
 		sp++;
 	}
-	if (num_sacks != tp->num_sacks) {
-		tp->num_sacks = num_sacks;
-		tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
+	if (num_sacks != tp->rx_opt.num_sacks) {
+		tp->rx_opt.num_sacks = num_sacks;
+		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
 	}
 }
 
@@ -3534,10 +3539,10 @@ static void tcp_data_queue(struct sock *
 
 	TCP_ECN_accept_cwr(tp, skb);
 
-	if (tp->dsack) {
-		tp->dsack = 0;
-		tp->eff_sacks = min_t(unsigned int, tp->num_sacks,
-						    4 - tp->tstamp_ok);
+	if (tp->rx_opt.dsack) {
+		tp->rx_opt.dsack = 0;
+		tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
+						    4 - tp->rx_opt.tstamp_ok);
 	}
 
 	/*  Queue data for delivery to the user.
@@ -3574,7 +3579,7 @@ queue_and_out:
 			     !sk_stream_rmem_schedule(sk, skb))) {
 				if (tcp_prune_queue(sk) < 0 ||
 				    !sk_stream_rmem_schedule(sk, skb))
-					goto drop;
+					goto drop_part;
 			}
 			sk_stream_set_owner_r(skb, sk);
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
@@ -3595,7 +3600,7 @@ queue_and_out:
 				tp->ack.pingpong = 0;
 		}
 
-		if (tp->num_sacks)
+		if (tp->rx_opt.num_sacks)
 			tcp_sack_remove(tp);
 
 		tcp_fast_path_check(sk, tp);
@@ -3618,6 +3623,12 @@ out_of_window:
 drop:
 		__kfree_skb(skb);
 		return;
+
+drop_part:
+		if (after(tp->copied_seq, tp->rcv_nxt))
+			tp->rcv_nxt = tp->copied_seq;
+		__kfree_skb(skb);
+		return;
 	}
 
 	/* Out of window. F.e. zero window probe. */
@@ -3662,10 +3673,10 @@ drop:
 
 	if (!skb_peek(&tp->out_of_order_queue)) {
 		/* Initial out of order segment, build 1 SACK. */
-		if (tp->sack_ok) {
-			tp->num_sacks = 1;
-			tp->dsack     = 0;
-			tp->eff_sacks = 1;
+		if (tp->rx_opt.sack_ok) {
+			tp->rx_opt.num_sacks = 1;
+			tp->rx_opt.dsack     = 0;
+			tp->rx_opt.eff_sacks = 1;
 			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
 			tp->selective_acks[0].end_seq =
 						TCP_SKB_CB(skb)->end_seq;
@@ -3679,7 +3690,7 @@ drop:
 		if (seq == TCP_SKB_CB(skb1)->end_seq) {
 			__skb_append(skb1, skb);
 
-			if (!tp->num_sacks ||
+			if (!tp->rx_opt.num_sacks ||
 			    tp->selective_acks[0].end_seq != seq)
 				goto add_sack;
 
@@ -3727,7 +3738,7 @@ drop:
 		}
 
 add_sack:
-		if (tp->sack_ok)
+		if (tp->rx_opt.sack_ok)
 			tcp_sack_new_ofo_skb(sk, seq, end_seq);
 	}
 }
@@ -3789,6 +3800,10 @@ tcp_collapse(struct sock *sk, struct sk_
 		nskb = alloc_skb(copy+header, GFP_ATOMIC);
 		if (!nskb)
 			return;
+		if (ub_tcprcvbuf_charge_forced(skb->sk, nskb) < 0) {
+			kfree_skb(nskb);
+			return;
+		}
 		skb_reserve(nskb, header);
 		memcpy(nskb->head, skb->head, header);
 		nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
@@ -3884,7 +3899,7 @@ static int tcp_prune_queue(struct sock *
 
 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
 		tcp_clamp_window(sk, tp);
-	else if (tcp_memory_pressure)
+	else if (ub_tcp_memory_pressure(sk))
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
 	tcp_collapse_ofo_queue(sk);
@@ -3910,8 +3925,8 @@ static int tcp_prune_queue(struct sock *
 		 * is in a sad state like this, we care only about integrity
 		 * of the connection not performance.
 		 */
-		if (tp->sack_ok)
-			tcp_sack_reset(tp);
+		if (tp->rx_opt.sack_ok)
+			tcp_sack_reset(&tp->rx_opt);
 		sk_stream_mem_reclaim(sk);
 	}
 
@@ -3966,7 +3981,7 @@ static void tcp_new_space(struct sock *s
 	    !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
 	    !tcp_memory_pressure &&
 	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
- 		int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache_std) +
+ 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache_std) +
 			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
 		    demanded = max_t(unsigned int, tp->snd_cwnd,
 						   tp->reordering + 1);
@@ -4233,7 +4248,7 @@ int tcp_rcv_established(struct sock *sk,
 	 *	We do checksum and copy also but from device to kernel.
 	 */
 
-	tp->saw_tstamp = 0;
+	tp->rx_opt.saw_tstamp = 0;
 
 	/*	pred_flags is 0xS?10 << 16 + snd_wnd
 	 *	if header_predition is to be made
@@ -4262,14 +4277,14 @@ int tcp_rcv_established(struct sock *sk,
 					  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
 				goto slow_path;
 
-			tp->saw_tstamp = 1;
+			tp->rx_opt.saw_tstamp = 1;
 			++ptr; 
-			tp->rcv_tsval = ntohl(*ptr);
+			tp->rx_opt.rcv_tsval = ntohl(*ptr);
 			++ptr;
-			tp->rcv_tsecr = ntohl(*ptr);
+			tp->rx_opt.rcv_tsecr = ntohl(*ptr);
 
 			/* If PAWS failed, check it more carefully in slow path */
-			if ((s32)(tp->rcv_tsval - tp->ts_recent) < 0)
+			if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
 				goto slow_path;
 
 			/* DO NOT update ts_recent here, if checksum fails
@@ -4349,6 +4364,10 @@ int tcp_rcv_established(struct sock *sk,
 
 				if ((int)skb->truesize > sk->sk_forward_alloc)
 					goto step5;
+				/* This is OK not to try to free memory here.
+				 * Do this below on slow path. Den */
+				if (ub_tcprcvbuf_charge(sk, skb) < 0)
+					goto step5;
 
 				NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
 
@@ -4395,7 +4414,7 @@ slow_path:
 	/*
 	 * RFC1323: H1. Apply PAWS check first.
 	 */
-	if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
+	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
 	    tcp_paws_discard(tp, skb)) {
 		if (!th->rst) {
 			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
@@ -4467,9 +4486,9 @@ static int tcp_rcv_synsent_state_process
 					 struct tcphdr *th, unsigned len)
 {
 	struct tcp_opt *tp = tcp_sk(sk);
-	int saved_clamp = tp->mss_clamp;
+	int saved_clamp = tp->rx_opt.mss_clamp;
 
-	tcp_parse_options(skb, tp, 0);
+	tcp_parse_options(skb, &tp->rx_opt, 0);
 
 	if (th->ack) {
 		/* rfc793:
@@ -4486,8 +4505,8 @@ static int tcp_rcv_synsent_state_process
 		if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
 			goto reset_and_undo;
 
-		if (tp->saw_tstamp && tp->rcv_tsecr &&
-		    !between(tp->rcv_tsecr, tp->retrans_stamp,
+		if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+		    !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
 			     tcp_time_stamp)) {
 			NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
 			goto reset_and_undo;
@@ -4542,13 +4561,13 @@ static int tcp_rcv_synsent_state_process
 		tp->snd_wnd = ntohs(th->window);
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
 
-		if (!tp->wscale_ok) {
-			tp->snd_wscale = tp->rcv_wscale = 0;
+		if (!tp->rx_opt.wscale_ok) {
+			tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
 			tp->window_clamp = min(tp->window_clamp, 65535U);
 		}
 
-		if (tp->saw_tstamp) {
-			tp->tstamp_ok	   = 1;
+		if (tp->rx_opt.saw_tstamp) {
+			tp->rx_opt.tstamp_ok	   = 1;
 			tp->tcp_header_len =
 				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
 			tp->advmss	    -= TCPOLEN_TSTAMP_ALIGNED;
@@ -4557,8 +4576,8 @@ static int tcp_rcv_synsent_state_process
 			tp->tcp_header_len = sizeof(struct tcphdr);
 		}
 
-		if (tp->sack_ok && sysctl_tcp_fack)
-			tp->sack_ok |= 2;
+		if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
+			tp->rx_opt.sack_ok |= 2;
 
 		tcp_sync_mss(sk, tp->pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
@@ -4585,7 +4604,7 @@ static int tcp_rcv_synsent_state_process
 		if (sock_flag(sk, SOCK_KEEPOPEN))
 			tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
 
-		if (!tp->snd_wscale)
+		if (!tp->rx_opt.snd_wscale)
 			__tcp_fast_path_on(tp, tp->snd_wnd);
 		else
 			tp->pred_flags = 0;
@@ -4632,7 +4651,7 @@ discard:
 	}
 
 	/* PAWS check. */
-	if (tp->ts_recent_stamp && tp->saw_tstamp && tcp_paws_check(tp, 0))
+	if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0))
 		goto discard_and_undo;
 
 	if (th->syn) {
@@ -4642,8 +4661,8 @@ discard:
 		 */
 		tcp_set_state(sk, TCP_SYN_RECV);
 
-		if (tp->saw_tstamp) {
-			tp->tstamp_ok = 1;
+		if (tp->rx_opt.saw_tstamp) {
+			tp->rx_opt.tstamp_ok = 1;
 			tcp_store_ts_recent(tp);
 			tp->tcp_header_len =
 				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
@@ -4690,13 +4709,13 @@ discard:
 	 */
 
 discard_and_undo:
-	tcp_clear_options(tp);
-	tp->mss_clamp = saved_clamp;
+	tcp_clear_options(&tp->rx_opt);
+	tp->rx_opt.mss_clamp = saved_clamp;
 	goto discard;
 
 reset_and_undo:
-	tcp_clear_options(tp);
-	tp->mss_clamp = saved_clamp;
+	tcp_clear_options(&tp->rx_opt);
+	tp->rx_opt.mss_clamp = saved_clamp;
 	return 1;
 }
 
@@ -4714,7 +4733,7 @@ int tcp_rcv_state_process(struct sock *s
 	struct tcp_opt *tp = tcp_sk(sk);
 	int queued = 0;
 
-	tp->saw_tstamp = 0;
+	tp->rx_opt.saw_tstamp = 0;
 
 	switch (sk->sk_state) {
 	case TCP_CLOSE:
@@ -4771,7 +4790,7 @@ int tcp_rcv_state_process(struct sock *s
 		return 0;
 	}
 
-	if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
+	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
 	    tcp_paws_discard(tp, skb)) {
 		if (!th->rst) {
 			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
@@ -4831,7 +4850,7 @@ int tcp_rcv_state_process(struct sock *s
 
 				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
 				tp->snd_wnd = ntohs(th->window) <<
-					      tp->snd_wscale;
+					      tp->rx_opt.snd_wscale;
 				tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq,
 					    TCP_SKB_CB(skb)->seq);
 
@@ -4839,11 +4858,11 @@ int tcp_rcv_state_process(struct sock *s
 				 * and does not calculate rtt.
 				 * Fix it at least with timestamps.
 				 */
-				if (tp->saw_tstamp && tp->rcv_tsecr &&
+				if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 				    !tp->srtt)
 					tcp_ack_saw_tstamp(sk, 0);
 
-				if (tp->tstamp_ok)
+				if (tp->rx_opt.tstamp_ok)
 					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
 
 				/* Make sure socket is routed, for
diff -Nurap linux-2.6.9-100.orig/net/ipv4/tcp_ipv4.c linux-2.6.9-ve023stab054/net/ipv4/tcp_ipv4.c
--- linux-2.6.9-100.orig/net/ipv4/tcp_ipv4.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/tcp_ipv4.c	2011-06-15 19:26:21.000000000 +0400
@@ -69,12 +69,16 @@
 #include <net/inet_common.h>
 #include <net/xfrm.h>
 
+#include <ub/ub_tcp.h>
+
 #include <linux/inet.h>
 #include <linux/ipv6.h>
 #include <linux/stddef.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
+#include <linux/ve_owner.h>
+
 extern int sysctl_ip_dynaddr;
 int sysctl_tcp_tw_reuse;
 int sysctl_tcp_low_latency;
@@ -105,9 +109,10 @@ int sysctl_local_port_range[2] = { 1024,
 int tcp_port_rover = 1024 - 1;
 
 static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
-				 __u32 faddr, __u16 fport)
+				 __u32 faddr, __u16 fport,
+				 envid_t veid)
 {
-	int h = (laddr ^ lport) ^ (faddr ^ fport);
+	int h = (laddr ^ lport) ^ (faddr ^ fport) ^ (veid ^ (veid >> 16));
 	h ^= h >> 16;
 	h ^= h >> 8;
 	return h & (tcp_ehash_size - 1);
@@ -120,15 +125,20 @@ static __inline__ int tcp_sk_hashfn(stru
 	__u16 lport = inet->num;
 	__u32 faddr = inet->daddr;
 	__u16 fport = inet->dport;
+	envid_t veid = VEID(VE_OWNER_SK(sk));
 
-	return tcp_hashfn(laddr, lport, faddr, fport);
+	return tcp_hashfn(laddr, lport, faddr, fport, veid);
 }
 
+DCL_VE_OWNER(TB, GENERIC, struct tcp_bind_bucket, owner_env,
+						inline, (always_inline))
+
 /* Allocate and initialize a new TCP local port bind bucket.
  * The bindhash mutex for snum's hash chain must be held here.
  */
 struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
-					  unsigned short snum)
+					  unsigned short snum,
+					  struct ve_struct *env)
 {
 	struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
 						      SLAB_ATOMIC);
@@ -136,6 +146,7 @@ struct tcp_bind_bucket *tcp_bucket_creat
 		tb->port = snum;
 		tb->fastreuse = 0;
 		INIT_HLIST_HEAD(&tb->owners);
+		SET_VE_OWNER_TB(tb, env);
 		hlist_add_head(&tb->node, &head->chain);
 	}
 	return tb;
@@ -153,10 +164,11 @@ void tcp_bucket_destroy(struct tcp_bind_
 /* Caller must disable local BH processing. */
 static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
 {
-	struct tcp_bind_hashbucket *head =
-				&tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
+	struct tcp_bind_hashbucket *head;
 	struct tcp_bind_bucket *tb;
 
+	head = &tcp_bhash[tcp_bhashfn(inet_sk(child)->num,
+					VEID(VE_OWNER_SK(child)))];
 	spin_lock(&head->lock);
 	tb = tcp_sk(sk)->bind_hash;
 	sk_add_bind_node(child, &tb->owners);
@@ -212,8 +224,10 @@ static int tcp_v4_get_port(struct sock *
 	struct tcp_bind_hashbucket *head;
 	struct hlist_node *node;
 	struct tcp_bind_bucket *tb;
+	struct ve_struct *env;
 	int ret;
 
+	env = VE_OWNER_SK(sk);
 	local_bh_disable();
 	if (!snum) {
 		int low = sysctl_local_port_range[0];
@@ -221,16 +235,21 @@ static int tcp_v4_get_port(struct sock *
 		int remaining = (high - low) + 1;
 		int rover;
 
+		/* Below we treat low > high as high == low. So do here. Den */
+		if (remaining < 1)
+			remaining = 1;
+
 		spin_lock(&tcp_portalloc_lock);
 		rover = tcp_port_rover;
 		do {
 			rover++;
 			if (rover < low || rover > high)
 				rover = low;
-			head = &tcp_bhash[tcp_bhashfn(rover)];
+			head = &tcp_bhash[tcp_bhashfn(rover, VEID(env))];
 			spin_lock(&head->lock);
 			tb_for_each(tb, node, &head->chain)
-				if (tb->port == rover)
+				if (tb->port == rover &&
+				    ve_accessible_strict(VE_OWNER_TB(tb), env))
 					goto next;
 			break;
 		next:
@@ -249,10 +268,11 @@ static int tcp_v4_get_port(struct sock *
 		 */
 		snum = rover;
 	} else {
-		head = &tcp_bhash[tcp_bhashfn(snum)];
+		head = &tcp_bhash[tcp_bhashfn(snum, VEID(env))];
 		spin_lock(&head->lock);
 		tb_for_each(tb, node, &head->chain)
-			if (tb->port == snum)
+			if (tb->port == snum &&
+			    ve_accessible_strict(VE_OWNER_TB(tb), env))
 				goto tb_found;
 	}
 	tb = NULL;
@@ -272,7 +292,7 @@ tb_found:
 	}
 tb_not_found:
 	ret = 1;
-	if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
+	if (!tb && (tb = tcp_bucket_create(head, snum, env)) == NULL)
 		goto fail_unlock;
 	if (hlist_empty(&tb->owners)) {
 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
@@ -301,9 +321,10 @@ fail:
 static void __tcp_put_port(struct sock *sk)
 {
 	struct inet_opt *inet = inet_sk(sk);
-	struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
+	struct tcp_bind_hashbucket *head;
 	struct tcp_bind_bucket *tb;
 
+	head = &tcp_bhash[tcp_bhashfn(inet->num, VEID(VE_OWNER_SK(sk)))];
 	spin_lock(&head->lock);
 	tb = tcp_sk(sk)->bind_hash;
 	__sk_del_bind_node(sk);
@@ -412,7 +433,8 @@ void tcp_unhash(struct sock *sk)
  * during the search since they can never be otherwise.
  */
 static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
-					     unsigned short hnum, int dif)
+					     unsigned short hnum, int dif,
+					     struct ve_struct *env)
 {
 	struct sock *result = NULL, *sk;
 	struct hlist_node *node;
@@ -422,7 +444,9 @@ static struct sock *__tcp_v4_lookup_list
 	sk_for_each(sk, node, head) {
 		struct inet_opt *inet = inet_sk(sk);
 
-		if (inet->num == hnum && !ipv6_only_sock(sk)) {
+		if (inet->num == hnum &&
+		    ve_accessible_strict(VE_OWNER_SK(sk), env) &&
+		    !ipv6_only_sock(sk)) {
 			__u32 rcv_saddr = inet->rcv_saddr;
 
 			score = (sk->sk_family == PF_INET ? 1 : 0);
@@ -453,18 +477,21 @@ inline struct sock *tcp_v4_lookup_listen
 {
 	struct sock *sk = NULL;
 	struct hlist_head *head;
+	struct ve_struct *env;
 
+	env = get_exec_env();
 	read_lock(&tcp_lhash_lock);
-	head = &tcp_listening_hash[tcp_lhashfn(hnum)];
+	head = &tcp_listening_hash[tcp_lhashfn(hnum, VEID(env))];
 	if (!hlist_empty(head)) {
 		struct inet_opt *inet = inet_sk((sk = __sk_head(head)));
 
 		if (inet->num == hnum && !sk->sk_node.next &&
+		    ve_accessible_strict(VE_OWNER_SK(sk), env) &&
 		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
 		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
 		    !sk->sk_bound_dev_if)
 			goto sherry_cache;
-		sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
+		sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif, env);
 	}
 	if (sk) {
 sherry_cache:
@@ -492,17 +519,22 @@ static inline struct sock *__tcp_v4_look
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	int hash = tcp_hashfn(daddr, hnum, saddr, sport);
+	int hash;
+	struct ve_struct *env;
+
+	env = get_exec_env();
+	hash = tcp_hashfn(daddr, hnum, saddr, sport, VEID(env));
 	head = &tcp_ehash[hash];
 	read_lock(&head->lock);
 	sk_for_each(sk, node, &head->chain) {
-		if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
+		if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif, env))
 			goto hit; /* You sunk my battleship! */
 	}
 
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
 	sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
-		if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
+		if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr,
+							ports, dif, env))
 			goto hit;
 	}
 	sk = NULL;
@@ -553,11 +585,16 @@ static int __tcp_v4_check_established(st
 	int dif = sk->sk_bound_dev_if;
 	TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
 	__u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
-	int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
-	struct tcp_ehash_bucket *head = &tcp_ehash[hash];
+	int hash;
+	struct tcp_ehash_bucket *head;
 	struct sock *sk2;
 	struct hlist_node *node;
 	struct tcp_tw_bucket *tw;
+	struct ve_struct *env;
+
+	env = VE_OWNER_SK(sk);
+	hash = tcp_hashfn(daddr, lport, saddr, inet->dport, VEID(env));
+	head = &tcp_ehash[hash];
 
 	write_lock(&head->lock);
 
@@ -565,7 +602,8 @@ static int __tcp_v4_check_established(st
 	sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
 		tw = (struct tcp_tw_bucket *)sk2;
 
-		if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
+		if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr,
+							ports, dif, env)) {
 			struct tcp_opt *tp = tcp_sk(sk);
 
 			/* With PAWS, it is safe from the viewpoint
@@ -589,8 +627,8 @@ static int __tcp_v4_check_established(st
 				if ((tp->write_seq =
 						tw->tw_snd_nxt + 65535 + 2) == 0)
 					tp->write_seq = 1;
-				tp->ts_recent	    = tw->tw_ts_recent;
-				tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
+				tp->rx_opt.ts_recent	   = tw->tw_ts_recent;
+				tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
 				sock_hold(sk2);
 				goto unique;
 			} else
@@ -601,7 +639,7 @@ static int __tcp_v4_check_established(st
 
 	/* And established part... */
 	sk_for_each(sk2, node, &head->chain) {
-		if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+		if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif, env))
 			goto not_unique;
 	}
 
@@ -643,7 +681,9 @@ static int tcp_v4_hash_connect(struct so
  	struct tcp_bind_hashbucket *head;
  	struct tcp_bind_bucket *tb;
 	int ret;
+	struct ve_struct *env;
 
+	env = VE_OWNER_SK(sk);
  	if (!snum) {
  		int rover;
  		int low = sysctl_local_port_range[0];
@@ -674,7 +714,7 @@ static int tcp_v4_hash_connect(struct so
  			rover++;
  			if ((rover < low) || (rover > high))
  				rover = low;
- 			head = &tcp_bhash[tcp_bhashfn(rover)];
+ 			head = &tcp_bhash[tcp_bhashfn(rover, VEID(env))];
  			spin_lock(&head->lock);
 
  			/* Does not bother with rcv_saddr checks,
@@ -682,7 +722,9 @@ static int tcp_v4_hash_connect(struct so
  			 * unique enough.
  			 */
 			tb_for_each(tb, node, &head->chain) {
- 				if (tb->port == rover) {
+				if (tb->port == rover &&
+				    ve_accessible_strict(VE_OWNER_TB(tb), env))
+				{
  					BUG_TRAP(!hlist_empty(&tb->owners));
  					if (tb->fastreuse >= 0)
  						goto next_port;
@@ -694,7 +736,7 @@ static int tcp_v4_hash_connect(struct so
  				}
  			}
 
- 			tb = tcp_bucket_create(head, rover);
+ 			tb = tcp_bucket_create(head, rover, env);
  			if (!tb) {
  				spin_unlock(&head->lock);
  				break;
@@ -733,7 +775,7 @@ ok:
 		goto out;
  	}
 
- 	head  = &tcp_bhash[tcp_bhashfn(snum)];
+ 	head  = &tcp_bhash[tcp_bhashfn(snum, VEID(env))];
  	tb  = tcp_sk(sk)->bind_hash;
 	spin_lock_bh(&head->lock);
 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
@@ -796,25 +838,25 @@ int tcp_v4_connect(struct sock *sk, stru
 		inet->saddr = rt->rt_src;
 	inet->rcv_saddr = inet->saddr;
 
-	if (tp->ts_recent_stamp && inet->daddr != daddr) {
+	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
 		/* Reset inherited state */
-		tp->ts_recent	    = 0;
-		tp->ts_recent_stamp = 0;
-		tp->write_seq	    = 0;
+		tp->rx_opt.ts_recent	   = 0;
+		tp->rx_opt.ts_recent_stamp = 0;
+		tp->write_seq		   = 0;
 	}
 
 	if (sysctl_tcp_tw_recycle &&
-	    !tp->ts_recent_stamp && rt->rt_dst == daddr) {
+	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
 		struct inet_peer *peer = rt_get_peer(rt);
 
 		/* VJ's idea. We save last timestamp seen from
 		 * the destination in peer table, when entering state TIME-WAIT
-		 * and initialize ts_recent from it, when trying new connection.
+		 * and initialize rx_opt.ts_recent from it, when trying new connection.
 		 */
 
 		if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
-			tp->ts_recent_stamp = peer->tcp_ts_stamp;
-			tp->ts_recent = peer->tcp_ts;
+			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+			tp->rx_opt.ts_recent = peer->tcp_ts;
 		}
 	}
 
@@ -825,7 +867,7 @@ int tcp_v4_connect(struct sock *sk, stru
 	if (inet->opt)
 		tp->ext_header_len = inet->opt->optlen;
 
-	tp->mss_clamp = 536;
+	tp->rx_opt.mss_clamp = 536;
 
 	/* Socket identity is still unknown (sport may be zero).
 	 * However we set state to SYN-SENT and not releasing socket
@@ -1260,9 +1302,8 @@ static void tcp_v4_timewait_ack(struct s
 	struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
 
 	tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
-			tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
-
-	tcp_tw_put(tw);
+			tw->tw_rcv_wnd >> (tw->tw_rcv_wscale & TW_WSCALE_MASK),
+			tw->tw_ts_recent);
 }
 
 static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
@@ -1403,10 +1444,11 @@ struct or_calltable or_ipv4 = {
 	.destructor	=	tcp_v4_or_free,
 	.send_reset	=	tcp_v4_send_reset,
 };
+EXPORT_SYMBOL(or_ipv4);
 
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcp_opt tp;
+	struct tcp_options_received tmp_opt;
 	struct open_request *req;
 	__u32 saddr = skb->nh.iph->saddr;
 	__u32 daddr = skb->nh.iph->daddr;
@@ -1448,29 +1490,29 @@ int tcp_v4_conn_request(struct sock *sk,
 	if (!req)
 		goto drop;
 
-	tcp_clear_options(&tp);
-	tp.mss_clamp = 536;
-	tp.user_mss  = tcp_sk(sk)->user_mss;
+	tcp_clear_options(&tmp_opt);
+	tmp_opt.mss_clamp = 536;
+	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
 
-	tcp_parse_options(skb, &tp, 0);
+	tcp_parse_options(skb, &tmp_opt, 0);
 
 	if (want_cookie) {
-		tcp_clear_options(&tp);
-		tp.saw_tstamp = 0;
+		tcp_clear_options(&tmp_opt);
+		tmp_opt.saw_tstamp = 0;
 	}
 
-	if (tp.saw_tstamp && !tp.rcv_tsval) {
+	if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
 		/* Some OSes (unknown ones, but I see them on web server, which
 		 * contains information interesting only for windows'
 		 * users) do not send their stamp in SYN. It is easy case.
 		 * We simply do not advertise TS support.
 		 */
-		tp.saw_tstamp = 0;
-		tp.tstamp_ok  = 0;
+		tmp_opt.saw_tstamp = 0;
+		tmp_opt.tstamp_ok  = 0;
 	}
-	tp.tstamp_ok = tp.saw_tstamp;
+	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 
-	tcp_openreq_init(req, &tp, skb);
+	tcp_openreq_init(req, &tmp_opt, skb);
 
 	req->af.v4_req.loc_addr = daddr;
 	req->af.v4_req.rmt_addr = saddr;
@@ -1496,7 +1538,7 @@ int tcp_v4_conn_request(struct sock *sk,
 		 * timewait bucket, so that all the necessary checks
 		 * are made in the function processing timewait state.
 		 */
-		if (tp.saw_tstamp &&
+		if (tmp_opt.saw_tstamp &&
 		    sysctl_tcp_tw_recycle &&
 		    (dst = tcp_v4_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
@@ -1683,12 +1725,15 @@ static int tcp_v4_checksum_init(struct s
  */
 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
+	struct user_beancounter *ub;
+
+	ub = set_sk_exec_ub(sk);
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		TCP_CHECK_TIMER(sk);
 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
 			goto reset;
 		TCP_CHECK_TIMER(sk);
-		return 0;
+		goto restore_context;
 	}
 
 	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
@@ -1702,7 +1747,7 @@ int tcp_v4_do_rcv(struct sock *sk, struc
 		if (nsk != sk) {
 			if (tcp_child_process(sk, nsk, skb))
 				goto reset;
-			return 0;
+			goto restore_context;
 		}
 	}
 
@@ -1710,6 +1755,9 @@ int tcp_v4_do_rcv(struct sock *sk, struc
 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
 		goto reset;
 	TCP_CHECK_TIMER(sk);
+
+restore_context:
+	(void)set_exec_ub(ub);
 	return 0;
 
 reset:
@@ -1721,7 +1769,7 @@ discard:
 	 * might be destroyed here. This current version compiles correctly,
 	 * but you have been warned.
 	 */
-	return 0;
+	goto restore_context;
 
 csum_err:
 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -1834,13 +1882,17 @@ do_time_wait:
 		tcp_tw_put((struct tcp_tw_bucket *) sk);
 		goto discard_it;
 	}
+	spin_lock(&((struct tcp_tw_bucket *)sk)->tw_lock);
 	switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
 					   skb, th, skb->len)) {
 	case TCP_TW_SYN: {
-		struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
+		struct sock *sk2;
+
+		sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
 							  ntohs(th->dest),
 							  tcp_v4_iif(skb));
 		if (sk2) {
+			spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
 			tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
 			tcp_tw_put((struct tcp_tw_bucket *)sk);
 			sk = sk2;
@@ -1852,9 +1904,13 @@ do_time_wait:
 		tcp_v4_timewait_ack(sk, skb);
 		break;
 	case TCP_TW_RST:
+		spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+		tcp_tw_put((struct tcp_tw_bucket *)sk);
 		goto no_tcp_socket;
 	case TCP_TW_SUCCESS:;
 	}
+	spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+	tcp_tw_put((struct tcp_tw_bucket *)sk);
 	goto discard_it;
 }
 
@@ -2000,11 +2056,11 @@ int tcp_v4_remember_stamp(struct sock *s
 	}
 
 	if (peer) {
-		if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 ||
+		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
-		     peer->tcp_ts_stamp <= tp->ts_recent_stamp)) {
-			peer->tcp_ts_stamp = tp->ts_recent_stamp;
-			peer->tcp_ts = tp->ts_recent;
+		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
+			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
+			peer->tcp_ts = tp->rx_opt.ts_recent;
 		}
 		if (release_it)
 			inet_putpeer(peer);
@@ -2076,6 +2132,8 @@ static int tcp_v4_init_sock(struct sock 
 	tp->snd_cwnd_clamp = ~0;
 	tp->mss_cache_std = tp->mss_cache = 536;
 
+	tp->advmss = 65535; /* max value */
+
 	tp->reordering = sysctl_tcp_reordering;
 
 	sk->sk_state = TCP_CLOSE;
@@ -2116,6 +2174,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
 	 * If sendmsg cached page exists, toss it.
 	 */
 	if (sk->sk_sndmsg_page) {
+		/* queue is empty, uncharge */
+		ub_sock_tcp_detachpage(sk);
 		__free_page(sk->sk_sndmsg_page);
 		sk->sk_sndmsg_page = NULL;
 	}
@@ -2130,16 +2190,34 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCP sock list dumping. */
 
-static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head)
+static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head,
+		envid_t veid)
 {
-	return hlist_empty(head) ? NULL :
-		list_entry(head->first, struct tcp_tw_bucket, tw_node);
+	struct tcp_tw_bucket *tw;
+	struct hlist_node *pos;
+
+	if (hlist_empty(head))
+		return NULL;
+	hlist_for_each_entry(tw, pos, head, tw_node) {
+		if (!ve_accessible_veid(TW_VEID(tw), veid))
+			continue;
+		return tw;
+	}
+	return NULL;
 }
 
-static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
+static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw,
+		envid_t veid)
 {
-	return tw->tw_node.next ?
-		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
+	while (1) {
+		if (tw->tw_node.next == NULL)
+			return NULL;
+		tw = hlist_entry(tw->tw_node.next, typeof(*tw), tw_node);
+		if (!ve_accessible_veid(TW_VEID(tw), veid))
+			continue;
+		return tw;
+	}
+	return NULL;	/* make compiler happy */
 }
 
 static void *listening_get_next(struct seq_file *seq, void *cur)
@@ -2148,7 +2226,9 @@ static void *listening_get_next(struct s
 	struct hlist_node *node;
 	struct sock *sk = cur;
 	struct tcp_iter_state* st = seq->private;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	if (!sk) {
 		st->bucket = 0;
 		sk = sk_head(&tcp_listening_hash[0]);
@@ -2188,6 +2268,8 @@ get_req:
 	}
 get_sk:
 	sk_for_each_from(sk, node) {
+		if (!ve_accessible(VE_OWNER_SK(sk), ve))
+			continue;
 		if (sk->sk_family == st->family) {
 			cur = sk;
 			goto out;
@@ -2228,7 +2310,9 @@ static void *established_get_first(struc
 {
 	struct tcp_iter_state* st = seq->private;
 	void *rc = NULL;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
 		struct sock *sk;
 		struct hlist_node *node;
@@ -2236,6 +2320,8 @@ static void *established_get_first(struc
 	       
 		read_lock(&tcp_ehash[st->bucket].lock);
 		sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
+			if (!ve_accessible(VE_OWNER_SK(sk), ve))
+				continue;
 			if (sk->sk_family != st->family) {
 				continue;
 			}
@@ -2245,6 +2331,8 @@ static void *established_get_first(struc
 		st->state = TCP_SEQ_STATE_TIME_WAIT;
 		tw_for_each(tw, node,
 			    &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
+			if (!ve_accessible_veid(TW_VEID(tw), VEID(ve)))
+				continue;
 			if (tw->tw_family != st->family) {
 				continue;
 			}
@@ -2264,16 +2352,17 @@ static void *established_get_next(struct
 	struct tcp_tw_bucket *tw;
 	struct hlist_node *node;
 	struct tcp_iter_state* st = seq->private;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	++st->num;
 
 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
 		tw = cur;
-		tw = tw_next(tw);
+		tw = tw_next(tw, VEID(ve));
 get_tw:
-		while (tw && tw->tw_family != st->family) {
-			tw = tw_next(tw);
-		}
+		while (tw && tw->tw_family != st->family)
+			tw = tw_next(tw, VEID(ve));
 		if (tw) {
 			cur = tw;
 			goto out;
@@ -2291,12 +2380,14 @@ get_tw:
 		sk = sk_next(sk);
 
 	sk_for_each_from(sk, node) {
+		if (!ve_accessible(VE_OWNER_SK(sk), ve))
+			continue;
 		if (sk->sk_family == st->family)
 			goto found;
 	}
 
 	st->state = TCP_SEQ_STATE_TIME_WAIT;
-	tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
+	tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain, VEID(ve));
 	goto get_tw;
 found:
 	cur = sk;
@@ -2643,6 +2734,85 @@ void __init tcp_v4_init(struct net_proto
 	tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
 }
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+static void tcp_kill_ve_onesk(struct sock *sk)
+{
+	struct tcp_opt *tp = tcp_sk(sk);
+
+	/* Check the assumed state of the socket. */
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		static int printed;
+invalid:
+		if (!printed)
+			printk(KERN_DEBUG "Killing sk: dead %d, state %d, "
+				"wrseq %u unseq %u, wrqu %d.\n",
+				sock_flag(sk, SOCK_DEAD), sk->sk_state,
+				tp->write_seq, tp->snd_una,
+				!skb_queue_empty(&sk->sk_write_queue));
+		printed = 1;
+		return;
+	}
+
+	tcp_send_active_reset(sk, GFP_ATOMIC);
+	switch (sk->sk_state) {
+		case TCP_FIN_WAIT1:
+		case TCP_CLOSING:
+			/* In these 2 states the peer may want us to retransmit
+			 * some data and/or FIN.  Entering "resetting mode"
+			 * instead.
+			 */
+			tcp_time_wait(sk, TCP_CLOSE, 0);
+			break;
+		case TCP_FIN_WAIT2:
+			/* By some reason the socket may stay in this state
+			 * without turning into a TW bucket.  Fix it.
+			 */
+			tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
+			break;
+		case TCP_LAST_ACK:
+			/* Just jump into CLOSED state. */
+			tcp_done(sk);
+			break;
+		default:
+			/* The socket must be already close()d. */
+			goto invalid;
+	}
+}
+
+void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
+{
+	struct tcp_ehash_bucket *head;
+	int i;
+
+	/* alive */
+	local_bh_disable();
+	head = tcp_ehash;
+	for (i = 0; i < tcp_ehash_size; i++) {
+		struct sock *sk;
+		struct hlist_node *node;
+more_work:
+		write_lock(&head[i].lock);
+		sk_for_each(sk, node, &head[i].chain) {
+			if (ve_accessible_strict(VE_OWNER_SK(sk), envid)) {
+				sock_hold(sk);
+				write_unlock(&head[i].lock);
+
+				bh_lock_sock(sk);
+				/* sk might have disappeared from the hash before
+				 * we got the lock */
+				if (sk->sk_state != TCP_CLOSE)
+					tcp_kill_ve_onesk(sk);
+				bh_unlock_sock(sk);
+				sock_put(sk);
+				goto more_work;
+			}
+		}
+		write_unlock(&head[i].lock);
+	}
+	local_bh_enable();
+}
+#endif
+
 EXPORT_SYMBOL(ipv4_specific);
 EXPORT_SYMBOL(tcp_bind_hash);
 EXPORT_SYMBOL(tcp_bucket_create);
@@ -2661,6 +2831,7 @@ EXPORT_SYMBOL(tcp_v4_rebuild_header);
 EXPORT_SYMBOL(tcp_v4_remember_stamp);
 EXPORT_SYMBOL(tcp_v4_send_check);
 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
+EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
 
 #ifdef CONFIG_PROC_FS
 EXPORT_SYMBOL(tcp_proc_register);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/tcp_minisocks.c linux-2.6.9-ve023stab054/net/ipv4/tcp_minisocks.c
--- linux-2.6.9-100.orig/net/ipv4/tcp_minisocks.c	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/tcp_minisocks.c	2011-06-15 19:26:20.000000000 +0400
@@ -29,6 +29,9 @@
 #include <net/inet_common.h>
 #include <net/xfrm.h>
 
+#include <ub/ub_net.h>
+#include <ub/ub_orphan.h>
+
 #ifdef CONFIG_SYSCTL
 #define SYNC_INIT 0 /* let the user enable it */
 #else
@@ -37,6 +40,8 @@
 
 int sysctl_tcp_tw_recycle;
 int sysctl_tcp_max_tw_buckets = NR_FILE*2;
+int sysctl_tcp_max_tw_kmem_fraction = 384;
+int sysctl_tcp_max_tw_buckets_ve = 16536;
 
 int sysctl_tcp_syncookies = SYNC_INIT; 
 int sysctl_tcp_abort_on_overflow;
@@ -74,7 +79,7 @@ static void tcp_timewait_kill(struct tcp
 	write_unlock(&ehead->lock);
 
 	/* Disassociate with bind bucket. */
-	bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)];
+	bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num, TW_VEID(tw))];
 	spin_lock(&bhead->lock);
 	tb = tw->tw_tb;
 	__hlist_del(&tw->tw_bind_node);
@@ -123,17 +128,17 @@ enum tcp_tw_status
 tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
 			   struct tcphdr *th, unsigned len)
 {
-	struct tcp_opt tp;
+	struct tcp_options_received tmp_opt;
 	int paws_reject = 0;
 
-	tp.saw_tstamp = 0;
+	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) {
-		tcp_parse_options(skb, &tp, 0);
+		tcp_parse_options(skb, &tmp_opt, 0);
 
-		if (tp.saw_tstamp) {
-			tp.ts_recent	   = tw->tw_ts_recent;
-			tp.ts_recent_stamp = tw->tw_ts_recent_stamp;
-			paws_reject = tcp_paws_check(&tp, th->rst);
+		if (tmp_opt.saw_tstamp) {
+			tmp_opt.ts_recent	   = tw->tw_ts_recent;
+			tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
 		}
 	}
 
@@ -150,33 +155,28 @@ tcp_timewait_state_process(struct tcp_tw
 		if (th->rst)
 			goto kill;
 
-		if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt))
-			goto kill_with_rst;
+		if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt)) {
+			tw->tw_substate = TCP_CLOSE;
+			tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+			return TCP_TW_RST;
+		}
 
 		/* Dup ACK? */
 		if (!after(TCP_SKB_CB(skb)->end_seq, tw->tw_rcv_nxt) ||
-		    TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
-			tcp_tw_put(tw);
+		    TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
 			return TCP_TW_SUCCESS;
-		}
 
-		/* New data or FIN. If new data arrive after half-duplex close,
-		 * reset.
-		 */
-		if (!th->fin ||
-		    TCP_SKB_CB(skb)->end_seq != tw->tw_rcv_nxt + 1) {
-kill_with_rst:
-			tcp_tw_deschedule(tw);
-			tcp_tw_put(tw);
-			return TCP_TW_RST;
-		}
-
-		/* FIN arrived, enter true time-wait state. */
-		tw->tw_substate	= TCP_TIME_WAIT;
-		tw->tw_rcv_nxt	= TCP_SKB_CB(skb)->end_seq;
-		if (tp.saw_tstamp) {
+		/* New data or FIN. */
+		if (th->fin && TCP_SKB_CB(skb)->end_seq == tw->tw_rcv_nxt + 1) {
+			/* FIN arrived, enter true time-wait state. */
+			tw->tw_substate = TCP_TIME_WAIT;
+			tw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		} else
+			/* If new data arrive after half-duplex close, reset. */
+			tw->tw_substate = TCP_CLOSE;
+		if (tmp_opt.saw_tstamp) {
 			tw->tw_ts_recent_stamp	= xtime.tv_sec;
-			tw->tw_ts_recent	= tp.rcv_tsval;
+			tw->tw_ts_recent	= tmp_opt.rcv_tsval;
 		}
 
 		/* I am shamed, but failed to make it more elegant.
@@ -190,7 +190,9 @@ kill_with_rst:
 			tcp_tw_schedule(tw, tw->tw_timeout);
 		else
 			tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
-		return TCP_TW_ACK;
+
+		return (tw->tw_substate == TCP_TIME_WAIT) ?
+			TCP_TW_ACK : TCP_TW_RST;
 	}
 
 	/*
@@ -223,18 +225,16 @@ kill_with_rst:
 			if (sysctl_tcp_rfc1337 == 0) {
 kill:
 				tcp_tw_deschedule(tw);
-				tcp_tw_put(tw);
 				return TCP_TW_SUCCESS;
 			}
 		}
 		tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
 
-		if (tp.saw_tstamp) {
-			tw->tw_ts_recent	= tp.rcv_tsval;
+		if (tmp_opt.saw_tstamp) {
+			tw->tw_ts_recent	= tmp_opt.rcv_tsval;
 			tw->tw_ts_recent_stamp	= xtime.tv_sec;
 		}
 
-		tcp_tw_put(tw);
 		return TCP_TW_SUCCESS;
 	}
 
@@ -257,7 +257,7 @@ kill:
 
 	if (th->syn && !th->rst && !th->ack && !paws_reject &&
 	    (after(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt) ||
-	     (tp.saw_tstamp && (s32)(tw->tw_ts_recent - tp.rcv_tsval) < 0))) {
+	     (tmp_opt.saw_tstamp && (s32)(tw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
 		u32 isn = tw->tw_snd_nxt + 65535 + 2;
 		if (isn == 0)
 			isn++;
@@ -268,7 +268,7 @@ kill:
 	if (paws_reject)
 		NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
 
-	if(!th->rst) {
+	if (!th->rst) {
 		/* In this case we must reset the TIMEWAIT timer.
 		 *
 		 * If it is ACKless SYN it may be both old duplicate
@@ -278,12 +278,9 @@ kill:
 		if (paws_reject || th->ack)
 			tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
 
-		/* Send ACK. Note, we do not put the bucket,
-		 * it will be released by caller.
-		 */
-		return TCP_TW_ACK;
+		return (tw->tw_substate == TCP_TIME_WAIT) ?
+			TCP_TW_ACK : TCP_TW_RST;
 	}
-	tcp_tw_put(tw);
 	return TCP_TW_SUCCESS;
 }
 
@@ -301,7 +298,8 @@ static void __tcp_tw_hashdance(struct so
 	   Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in
 	   binding cache, even if it is closed.
 	 */
-	bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
+	bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num,
+						VEID(VE_OWNER_SK(sk)))];
 	spin_lock(&bhead->lock);
 	tw->tw_tb = tcp_sk(sk)->bind_hash;
 	BUG_TRAP(tcp_sk(sk)->bind_hash);
@@ -329,12 +327,15 @@ void tcp_time_wait(struct sock *sk, int 
 	struct tcp_tw_bucket *tw = NULL;
 	struct tcp_opt *tp = tcp_sk(sk);
 	int recycle_ok = 0;
+	struct user_beancounter *ub;
 
-	if (sysctl_tcp_tw_recycle && tp->ts_recent_stamp)
+	if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp)
 		recycle_ok = tp->af_specific->remember_stamp(sk);
 
-	if (tcp_tw_count < sysctl_tcp_max_tw_buckets)
+	ub = set_sk_exec_ub(sk);
+	if (tcp_tw_count < sysctl_tcp_max_tw_buckets && ub_check_tw_count(sk))
 		tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC);
+	(void)set_exec_ub(ub);
 
 	if(tw != NULL) {
 		struct inet_opt *inet = inet_sk(sk);
@@ -351,16 +352,19 @@ void tcp_time_wait(struct sock *sk, int 
 		tw->tw_dport		= inet->dport;
 		tw->tw_family		= sk->sk_family;
 		tw->tw_reuse		= sk->sk_reuse;
-		tw->tw_rcv_wscale	= tp->rcv_wscale;
+		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale;
+		if (sk->sk_user_data != NULL)
+			tw->tw_rcv_wscale |= TW_WSCALE_SPEC;
 		atomic_set(&tw->tw_refcnt, 1);
 
 		tw->tw_hashent		= sk->sk_hashent;
 		tw->tw_rcv_nxt		= tp->rcv_nxt;
 		tw->tw_snd_nxt		= tp->snd_nxt;
 		tw->tw_rcv_wnd		= tcp_receive_window(tp);
-		tw->tw_ts_recent	= tp->ts_recent;
-		tw->tw_ts_recent_stamp	= tp->ts_recent_stamp;
+		tw->tw_ts_recent	= tp->rx_opt.ts_recent;
+		tw->tw_ts_recent_stamp	= tp->rx_opt.ts_recent_stamp;
 		tw_dead_node_init(tw);
+		spin_lock_init(&tw->tw_lock);
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		if (tw->tw_family == PF_INET6) {
@@ -375,6 +379,8 @@ void tcp_time_wait(struct sock *sk, int 
 			tw->tw_v6_ipv6only = 0;
 		}
 #endif
+		SET_TW_VEID(tw, VEID(VE_OWNER_SK(sk)));
+
 		/* Linkage updates. */
 		__tcp_tw_hashdance(sk, tw);
 
@@ -397,11 +403,19 @@ void tcp_time_wait(struct sock *sk, int 
 		 * socket up.  We've got bigger problems than
 		 * non-graceful socket closings.
 		 */
-		if (net_ratelimit())
-			printk(KERN_INFO "TCP: time wait bucket table overflow\n");
+		if (net_ratelimit()) {
+			int ubid = 0;
+#ifdef CONFIG_USER_RESOURCE
+			ubid = sock_has_ubc(sk) ? 
+				top_beancounter(sock_bc(sk)->ub)->ub_uid : 0;
+#endif
+			printk(KERN_INFO "TCP: time wait bucket table "
+			       "overflow (VE%d)\n", ubid);
+		}
 	}
 
-	tcp_update_metrics(sk);
+	if (state != TCP_CLOSE)
+		tcp_update_metrics(sk);
 	tcp_done(sk);
 }
 
@@ -442,6 +456,7 @@ static int tcp_do_twkill_work(int slot, 
 rescan:
 	tw_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) {
 		__tw_del_dead_node(tw);
+		ub_dec_tw_count(tw);
 		spin_unlock(&tw_death_lock);
 		tcp_timewait_kill(tw);
 		tcp_tw_put(tw);
@@ -534,6 +549,7 @@ void tcp_tw_deschedule(struct tcp_tw_buc
 {
 	spin_lock(&tw_death_lock);
 	if (tw_del_dead_node(tw)) {
+		ub_dec_tw_count(tw);
 		tcp_tw_put(tw);
 		if (--tcp_tw_count == 0)
 			del_timer(&tcp_tw_timer);
@@ -585,9 +601,10 @@ void tcp_tw_schedule(struct tcp_tw_bucke
 	spin_lock(&tw_death_lock);
 
 	/* Unlink it, if it was scheduled */
-	if (tw_del_dead_node(tw))
+	if (tw_del_dead_node(tw)) {
 		tcp_tw_count--;
-	else
+		ub_dec_tw_count(tw);
+	} else
 		atomic_inc(&tw->tw_refcnt);
 
 	if (slot >= TCP_TW_RECYCLE_SLOTS) {
@@ -620,6 +637,7 @@ void tcp_tw_schedule(struct tcp_tw_bucke
 
 	hlist_add_head(&tw->tw_death_node, list);
 
+	ub_inc_tw_count(tw);
 	if (tcp_tw_count++ == 0)
 		mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
 	spin_unlock(&tw_death_lock);
@@ -649,6 +667,7 @@ void tcp_twcal_tick(unsigned long dummy)
 					   &tcp_twcal_row[slot]) {
 				__tw_del_dead_node(tw);
 				tcp_timewait_kill(tw);
+				ub_dec_tw_count(tw);
 				tcp_tw_put(tw);
 				killed++;
 			}
@@ -694,6 +713,7 @@ struct sock *tcp_create_openreq_child(st
 		struct sk_filter *filter;
 
 		memcpy(newsk, sk, sizeof(struct tcp_sock));
+
 		newsk->sk_state = TCP_SYN_RECV;
 
 		/* SANITY */
@@ -703,6 +723,7 @@ struct sock *tcp_create_openreq_child(st
 		/* Clone the TCP header template */
 		inet_sk(newsk)->dport = req->rmt_port;
 
+		SET_VE_OWNER_SK(newsk, VE_OWNER_SK(sk));
 		sock_lock_init(newsk);
 		bh_lock_sock(newsk);
 
@@ -729,9 +750,14 @@ struct sock *tcp_create_openreq_child(st
 		if (sk->sk_create_child)
 			sk->sk_create_child(sk, newsk);
 
+		if (ub_tcp_sock_charge(newsk) < 0)
+			goto out_sk_free;
+
 		if (unlikely(xfrm_sk_clone_policy(newsk))) {
 			/* It is still raw copy of parent, so invalidate
 			 * destructor and make plain sk_free() */
+out_sk_free:
+			sock_reset_flag(newsk, SOCK_TIMESTAMP);
 			newsk->sk_destruct = NULL;
 			sk_free(newsk);
 			return NULL;
@@ -781,13 +807,13 @@ struct sock *tcp_create_openreq_child(st
 		newtp->pushed_seq = newtp->write_seq;
 		newtp->copied_seq = req->rcv_isn + 1;
 
-		newtp->saw_tstamp = 0;
+		newtp->rx_opt.saw_tstamp = 0;
 
-		newtp->dsack = 0;
-		newtp->eff_sacks = 0;
+		newtp->rx_opt.dsack = 0;
+		newtp->rx_opt.eff_sacks = 0;
 
 		newtp->probes_out = 0;
-		newtp->num_sacks = 0;
+		newtp->rx_opt.num_sacks = 0;
 		newtp->urg_data = 0;
 		newtp->listen_opt = NULL;
 		newtp->accept_queue = newtp->accept_queue_tail = NULL;
@@ -810,36 +836,36 @@ struct sock *tcp_create_openreq_child(st
 		newsk->sk_sleep = NULL;
 		newsk->sk_owner = NULL;
 
-		newtp->tstamp_ok = req->tstamp_ok;
-		if((newtp->sack_ok = req->sack_ok) != 0) {
+		newtp->rx_opt.tstamp_ok = req->tstamp_ok;
+		if((newtp->rx_opt.sack_ok = req->sack_ok) != 0) {
 			if (sysctl_tcp_fack)
-				newtp->sack_ok |= 2;
+				newtp->rx_opt.sack_ok |= 2;
 		}
 		newtp->window_clamp = req->window_clamp;
 		newtp->rcv_ssthresh = req->rcv_wnd;
 		newtp->rcv_wnd = req->rcv_wnd;
-		newtp->wscale_ok = req->wscale_ok;
-		if (newtp->wscale_ok) {
-			newtp->snd_wscale = req->snd_wscale;
-			newtp->rcv_wscale = req->rcv_wscale;
+		newtp->rx_opt.wscale_ok = req->wscale_ok;
+		if (newtp->rx_opt.wscale_ok) {
+			newtp->rx_opt.snd_wscale = req->snd_wscale;
+			newtp->rx_opt.rcv_wscale = req->rcv_wscale;
 		} else {
-			newtp->snd_wscale = newtp->rcv_wscale = 0;
+			newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
 			newtp->window_clamp = min(newtp->window_clamp, 65535U);
 		}
-		newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->snd_wscale;
+		newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale;
 		newtp->max_window = newtp->snd_wnd;
 
-		if (newtp->tstamp_ok) {
-			newtp->ts_recent = req->ts_recent;
-			newtp->ts_recent_stamp = xtime.tv_sec;
+		if (newtp->rx_opt.tstamp_ok) {
+			newtp->rx_opt.ts_recent = req->ts_recent;
+			newtp->rx_opt.ts_recent_stamp = xtime.tv_sec;
 			newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
 		} else {
-			newtp->ts_recent_stamp = 0;
+			newtp->rx_opt.ts_recent_stamp = 0;
 			newtp->tcp_header_len = sizeof(struct tcphdr);
 		}
 		if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
 			newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;
-		newtp->mss_clamp = req->mss;
+		newtp->rx_opt.mss_clamp = req->mss;
 		TCP_ECN_openreq_child(newtp, req);
 		if (newtp->ecn_flags&TCP_ECN_OK)
 			newsk->sk_no_largesend = 1;
@@ -864,21 +890,21 @@ struct sock *tcp_check_req(struct sock *
 	struct tcp_opt *tp = tcp_sk(sk);
 	u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
-	struct tcp_opt ttp;
+	struct tcp_options_received tmp_opt;
 	struct sock *child;
 
-	ttp.saw_tstamp = 0;
+	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(struct tcphdr)>>2)) {
-		tcp_parse_options(skb, &ttp, 0);
+		tcp_parse_options(skb, &tmp_opt, 0);
 
-		if (ttp.saw_tstamp) {
-			ttp.ts_recent = req->ts_recent;
+		if (tmp_opt.saw_tstamp) {
+			tmp_opt.ts_recent = req->ts_recent;
 			/* We do not store true stamp, but it is not required,
 			 * it can be estimated (approximately)
 			 * from another data.
 			 */
-			ttp.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
-			paws_reject = tcp_paws_check(&ttp, th->rst);
+			tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
+			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
 		}
 	}
 
@@ -983,63 +1009,63 @@ struct sock *tcp_check_req(struct sock *
 
 	/* In sequence, PAWS is OK. */
 
-	if (ttp.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
-		req->ts_recent = ttp.rcv_tsval;
+	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
+			req->ts_recent = tmp_opt.rcv_tsval;
 
-	if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
-		/* Truncate SYN, it is out of window starting
-		   at req->rcv_isn+1. */
-		flg &= ~TCP_FLAG_SYN;
-	}
+		if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
+			/* Truncate SYN, it is out of window starting
+			   at req->rcv_isn+1. */
+			flg &= ~TCP_FLAG_SYN;
+		}
 
-	/* RFC793: "second check the RST bit" and
-	 *	   "fourth, check the SYN bit"
-	 */
-	if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
-		goto embryonic_reset;
+		/* RFC793: "second check the RST bit" and
+		 *	   "fourth, check the SYN bit"
+		 */
+		if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
+			goto embryonic_reset;
 
-	/* ACK sequence verified above, just make sure ACK is
-	 * set.  If ACK not set, just silently drop the packet.
-	 */
-	if (!(flg & TCP_FLAG_ACK))
-		return NULL;
+		/* ACK sequence verified above, just make sure ACK is
+		 * set.  If ACK not set, just silently drop the packet.
+		 */
+		if (!(flg & TCP_FLAG_ACK))
+			return NULL;
 
-	/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
-	if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
-		req->acked = 1;
-		return NULL;
-	}
+		/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
+		if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
+			req->acked = 1;
+			return NULL;
+		}
 
-	/* OK, ACK is valid, create big socket and
-	 * feed this segment to it. It will repeat all
-	 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
-	 * ESTABLISHED STATE. If it will be dropped after
-	 * socket is created, wait for troubles.
-	 */
-	child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
-	if (child == NULL)
-		goto listen_overflow;
-
-	sk_set_owner(child, sk->sk_owner);
-	tcp_synq_unlink(tp, req, prev);
-	tcp_synq_removed(sk, req);
-
-	tcp_acceptq_queue(sk, req, child);
-	return child;
-
-listen_overflow:
-	if (!sysctl_tcp_abort_on_overflow) {
-		req->acked = 1;
-		return NULL;
-	}
+		/* OK, ACK is valid, create big socket and
+		 * feed this segment to it. It will repeat all
+		 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
+		 * ESTABLISHED STATE. If it will be dropped after
+		 * socket is created, wait for troubles.
+		 */
+		child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+		if (child == NULL)
+			goto listen_overflow;
+
+		sk_set_owner(child, sk->sk_owner);
+		tcp_synq_unlink(tp, req, prev);
+		tcp_synq_removed(sk, req);
+
+		tcp_acceptq_queue(sk, req, child);
+		return child;
+
+	listen_overflow:
+		if (!sysctl_tcp_abort_on_overflow) {
+			req->acked = 1;
+			return NULL;
+		}
 
-embryonic_reset:
-	NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
-	if (!(flg & TCP_FLAG_RST))
-		req->class->send_reset(skb);
+	embryonic_reset:
+		NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
+		if (!(flg & TCP_FLAG_RST))
+			req->class->send_reset(skb);
 
-	tcp_synq_drop(sk, req, prev);
-	return NULL;
+		tcp_synq_drop(sk, req, prev);
+		return NULL;
 }
 
 /*
diff -Nurap linux-2.6.9-100.orig/net/ipv4/tcp_output.c linux-2.6.9-ve023stab054/net/ipv4/tcp_output.c
--- linux-2.6.9-100.orig/net/ipv4/tcp_output.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/tcp_output.c	2011-06-15 19:26:20.000000000 +0400
@@ -42,6 +42,9 @@
 #include <linux/module.h>
 #include <linux/smp_lock.h>
 
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
+
 /* People can turn this off for buggy TCP's found in printers etc. */
 int sysctl_tcp_retrans_collapse = 1;
 
@@ -252,13 +255,13 @@ static __inline__ u16 tcp_select_window(
 	/* Make sure we do not exceed the maximum possible
 	 * scaled window.
 	 */
-	if (!tp->rcv_wscale && sysctl_tcp_workaround_signed_windows)
+	if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
 		new_win = min(new_win, MAX_TCP_WINDOW);
 	else
-		new_win = min(new_win, (65535U << tp->rcv_wscale));
+		new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
 
 	/* RFC1323 scaling applied */
-	new_win >>= tp->rcv_wscale;
+	new_win >>= tp->rx_opt.rcv_wscale;
 
 	/* If we advertise zero window, disable fast path. */
 	if (new_win == 0)
@@ -268,6 +271,13 @@ static __inline__ u16 tcp_select_window(
 }
 
 
+static int skb_header_size(struct sock *sk, int tcp_hlen)
+{
+	struct ip_options *opt = inet_sk(sk)->opt;
+	return tcp_hlen + sizeof(struct iphdr) +
+		(opt ? opt->optlen : 0)	+ ETH_HLEN /* For hard header */;
+}
+
 /* This routine actually transmits TCP packets queued in by
  * tcp_do_sendmsg().  This is used by both the initial
  * transmission and possible later retransmissions.
@@ -286,6 +296,7 @@ static int tcp_transmit_skb(struct sock 
 		struct tcp_opt *tp = tcp_sk(sk);
 		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 		int tcp_header_size = tp->tcp_header_len;
+		int header_size;
 		struct tcphdr *th;
 		int sysctl_flags;
 		int err;
@@ -312,14 +323,28 @@ static int tcp_transmit_skb(struct sock 
 				if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
 					tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
 			}
-		} else if (tp->eff_sacks) {
+		} else if (tp->rx_opt.eff_sacks) {
 			/* A SACK is 2 pad bytes, a 2 byte header, plus
 			 * 2 32-bit sequence numbers for each SACK block.
 			 */
 			tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
-					    (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
+					    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
 		}
-		
+
+		/* Unfortunately, we can have skb from outside world here
+		 * with size insufficient for header. It is impossible to make
+		 * guess when we queue skb, so the decision should be made
+		 * here. Den
+		 */
+		header_size = skb_header_size(sk, tcp_header_size);
+		if (skb->data - header_size < skb->head) {
+			int delta = header_size - skb_headroom(skb);
+			err = pskb_expand_head(skb, SKB_DATA_ALIGN(delta),
+					0, GFP_ATOMIC);
+			if (err)
+				return err;
+		}
+
 		/*
 		 * If the connection is idle and we are restarting,
 		 * then we don't want to do any Vegas calculations
@@ -365,9 +390,9 @@ static int tcp_transmit_skb(struct sock 
 					      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
 					      (sysctl_flags & SYSCTL_FLAG_SACK),
 					      (sysctl_flags & SYSCTL_FLAG_WSCALE),
-					      tp->rcv_wscale,
+					      tp->rx_opt.rcv_wscale,
 					      tcb->when,
-		      			      tp->ts_recent);
+		      			      tp->rx_opt.ts_recent);
 		} else {
 			tcp_build_and_update_options((__u32 *)(th + 1),
 						     tp, tcb->when);
@@ -482,15 +507,23 @@ static int tcp_fragment(struct sock *sk,
 	if (nsize < 0)
 		nsize = 0;
 
-	if (skb_cloned(skb) &&
-	    skb_is_nonlinear(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-		return -ENOMEM;
+	if (skb_cloned(skb) && skb_is_nonlinear(skb)) {
+		unsigned long chargesize;
+		chargesize = skb_bc(skb)->charged;
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			return -ENOMEM;
+		ub_sock_tcp_unchargesend(sk, chargesize);
+		ub_tcpsndbuf_charge_forced(sk, skb);
+	}
 
 	/* Get a new skb... force flag on. */
 	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
 	if (buff == NULL)
 		return -ENOMEM; /* We'll just try again later. */
+	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
+		kfree_skb(buff);
+		return -ENOMEM;
+	}
 	sk_charge_skb(sk, buff);
 
 	/* Correct the sequence numbers. */
@@ -616,10 +649,10 @@ int tcp_trim_head(struct sock *sk, struc
 
 /* This function synchronize snd mss to current pmtu/exthdr set.
 
-   tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
+   tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
    for TCP options, but includes only bare TCP header.
 
-   tp->mss_clamp is mss negotiated at connection setup.
+   tp->rx_opt.mss_clamp is mss negotiated at connection setup.
    It is minumum of user_mss and mss received with SYN.
    It also does not include TCP options.
 
@@ -628,7 +661,7 @@ int tcp_trim_head(struct sock *sk, struc
    tp->mss_cache is current effective sending mss, including
    all tcp options except for SACKs. It is evaluated,
    taking into account current pmtu, but never exceeds
-   tp->mss_clamp.
+   tp->rx_opt.mss_clamp.
 
    NOTE1. rfc1122 clearly states that advertised MSS
    DOES NOT include either tcp or ip options.
@@ -652,8 +685,8 @@ unsigned int tcp_sync_mss(struct sock *s
 	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
 
 	/* Clamp it (mss_clamp does not include tcp options) */
-	if (mss_now > tp->mss_clamp)
-		mss_now = tp->mss_clamp;
+	if (mss_now > tp->rx_opt.mss_clamp)
+		mss_now = tp->rx_opt.mss_clamp;
 
 	/* Now subtract optional transport overhead */
 	mss_now -= tp->ext_header_len + tp->ext2_header_len;
@@ -732,9 +765,9 @@ unsigned int tcp_current_mss(struct sock
 		mss_now = tp->mss_cache;
 	}
 
-	if (tp->eff_sacks)
+	if (tp->rx_opt.eff_sacks)
 		mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
-			    (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
+			    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
 	return mss_now;
 }
 EXPORT_SYMBOL_GPL(tcp_current_mss);
@@ -870,7 +903,7 @@ u32 __tcp_select_window(struct sock *sk)
 	if (free_space < full_space/2) {
 		tp->ack.quick = 0;
 
-		if (tcp_memory_pressure)
+		if (ub_tcp_shrink_rcvbuf(sk))
 			tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
 
 		if (free_space < mss)
@@ -884,16 +917,16 @@ u32 __tcp_select_window(struct sock *sk)
 	 * scaled window will not line up with the MSS boundary anyway.
 	 */
 	window = tp->rcv_wnd;
-	if (tp->rcv_wscale) {
+	if (tp->rx_opt.rcv_wscale) {
 		window = free_space;
 
 		/* Advertise enough space so that it won't get scaled away.
 		 * Import case: prevent zero window announcement if
 		 * 1<<rcv_wscale > mss.
 		 */
-		if (((window >> tp->rcv_wscale) << tp->rcv_wscale) != window)
-			window = (((window >> tp->rcv_wscale) + 1)
-				  << tp->rcv_wscale);
+		if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
+			window = (((window >> tp->rx_opt.rcv_wscale) + 1)
+				  << tp->rx_opt.rcv_wscale);
 	} else {
 		/* Get the largest window that is a nice multiple of mss.
 		 * Window clamp already applied above.
@@ -971,7 +1004,7 @@ static void tcp_retrans_try_collapse(str
 			tcp_dec_pcount(&tp->left_out, next_skb);
 		}
 		/* Reno case is special. Sigh... */
-		if (!tp->sack_ok && tcp_get_pcount(&tp->sacked_out)) {
+		if (!tp->rx_opt.sack_ok && tcp_get_pcount(&tp->sacked_out)) {
 			tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
 			tcp_dec_pcount(&tp->left_out, next_skb);
 		}
@@ -1211,7 +1244,7 @@ void tcp_xmit_retransmit_queue(struct so
 		return;
 
 	/* No forward retransmissions in Reno are possible. */
-	if (!tp->sack_ok)
+	if (!tp->rx_opt.sack_ok)
 		return;
 
 	/* Yeah, we have to make difficult choice between forward transmission
@@ -1281,6 +1314,7 @@ void tcp_send_fin(struct sock *sk)
 				break;
 			yield();
 		}
+		ub_tcpsndbuf_charge_forced(sk, skb);
 
 		/* Reserve space for headers and prepare control bits. */
 		skb_reserve(skb, MAX_TCP_HEADER);
@@ -1350,6 +1384,10 @@ int tcp_send_synack(struct sock *sk)
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
 				return -ENOMEM;
+			if (ub_tcpsndbuf_charge(sk, skb) < 0) {
+				kfree_skb(nskb);
+				return -ENOMEM;
+			}
 			__skb_unlink(skb, &sk->sk_write_queue);
 			__skb_queue_head(&sk->sk_write_queue, nskb);
 			sk_stream_free_skb(sk, skb);
@@ -1450,23 +1488,38 @@ static inline void tcp_connect_init(stru
 		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
 
 	/* If user gave his TCP_MAXSEG, record it to clamp */
-	if (tp->user_mss)
-		tp->mss_clamp = tp->user_mss;
+	if (tp->rx_opt.user_mss)
+		tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
 	tp->max_window = 0;
 	tcp_sync_mss(sk, dst_pmtu(dst));
 
+	if (tp->advmss == 0 || dst_metric(dst, RTAX_ADVMSS) == 0) {
+		printk("Oops in connect_init! tp->advmss=%d, dst->advmss=%d\n",
+				tp->advmss, dst_metric(dst, RTAX_ADVMSS));
+		printk("dst: pmtu=%u, advmss=%u\n",
+				dst_metric(dst, RTAX_MTU),
+				dst_metric(dst, RTAX_ADVMSS));
+		printk("sk->state=%d, tp: ack.rcv_mss=%d, mss_cache=%d, "
+				"advmss=%d, user_mss=%d\n",
+				sk->sk_state, tp->ack.rcv_mss, tp->mss_cache,
+				tp->advmss, tp->rx_opt.user_mss);
+	}
+
 	if (!tp->window_clamp)
 		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
-	tp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	if (dst_metric(dst, RTAX_ADVMSS) < tp->advmss)
+		tp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	if (tp->advmss == 0)
+		tp->advmss = 1460;
 	tcp_initialize_rcv_mss(sk);
 	tcp_ca_init(tp);
 
 	tcp_select_initial_window(tcp_full_space(sk),
-				  tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
+				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
 				  sysctl_tcp_window_scaling,
-				  &tp->rcv_wscale);
+				  &tp->rx_opt.rcv_wscale);
 
 	tp->rcv_ssthresh = tp->rcv_wnd;
 
@@ -1498,6 +1551,10 @@ int tcp_connect(struct sock *sk)
 	buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
 	if (unlikely(buff == NULL))
 		return -ENOBUFS;
+	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
+		kfree_skb(buff);
+		return -ENOBUFS;
+	}
 
 	/* Reserve space for headers. */
 	skb_reserve(buff, MAX_TCP_HEADER);
@@ -1749,3 +1806,4 @@ EXPORT_SYMBOL(tcp_make_synack);
 EXPORT_SYMBOL(tcp_simple_retransmit);
 EXPORT_SYMBOL(tcp_sync_mss);
 EXPORT_SYMBOL_GPL(tcp_write_xmit);
+EXPORT_SYMBOL_GPL(tcp_transmit_skb);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/tcp_timer.c linux-2.6.9-ve023stab054/net/ipv4/tcp_timer.c
--- linux-2.6.9-100.orig/net/ipv4/tcp_timer.c	2004-10-19 01:55:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/tcp_timer.c	2011-06-15 19:26:19.000000000 +0400
@@ -22,6 +22,8 @@
 
 #include <linux/module.h>
 #include <net/tcp.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_tcp.h>
 
 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
@@ -100,9 +102,10 @@ static void tcp_write_err(struct sock *s
 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 {
 	struct tcp_opt *tp = tcp_sk(sk);
-	int orphans = atomic_read(&tcp_orphan_count);
+	int orphans = tcp_get_orphan_count(sk);
+	int orph = orphans;
 
-	/* If peer does not open window for long time, or did not transmit 
+	/* If peer does not open window for long time, or did not transmit
 	 * anything for long time, penalize it. */
 	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
 		orphans <<= 1;
@@ -111,12 +114,15 @@ static int tcp_out_of_resources(struct s
 	if (sk->sk_err_soft)
 		orphans <<= 1;
 
-	if (orphans >= sysctl_tcp_max_orphans ||
-	    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-	     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
-		if (net_ratelimit())
-			printk(KERN_INFO "Out of socket memory\n");
-
+	if (tcp_too_many_orphans(sk, orphans)) {
+		if (net_ratelimit()) {
+			int ubid = 0;
+#ifdef CONFIG_USER_RESOURCE
+			ubid = sock_has_ubc(sk) ? sock_bc(sk)->ub->ub_uid : 0;
+#endif
+			printk(KERN_INFO "Orphaned socket dropped "
+			       "(%d,%d in VE%d)\n", orph, orphans, ubid);
+		}
 		/* Catch exceptional cases, when connection requires reset.
 		 *      1. Last segment was sent recently. */
 		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
@@ -206,6 +212,7 @@ static int tcp_write_timeout(struct sock
 static void tcp_delack_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock*)data;
+	struct ve_struct *env = set_exec_env(VE_OWNER_SK(sk));
 	struct tcp_opt *tp = tcp_sk(sk);
 
 	bh_lock_sock(sk);
@@ -257,11 +264,12 @@ static void tcp_delack_timer(unsigned lo
 	TCP_CHECK_TIMER(sk);
 
 out:
-	if (tcp_memory_pressure)
+	if (ub_tcp_memory_pressure(sk))
 		sk_stream_mem_reclaim(sk);
 out_unlock:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
 
 static void tcp_probe_timer(struct sock *sk)
@@ -315,6 +323,9 @@ static void tcp_probe_timer(struct sock 
 static void tcp_retransmit_timer(struct sock *sk)
 {
 	struct tcp_opt *tp = tcp_sk(sk);
+	struct ve_struct *ve_old;
+
+	ve_old = set_exec_env(VE_OWNER_SK(sk));
 
 	if (!tcp_get_pcount(&tp->packets_out))
 		goto out;
@@ -351,7 +362,7 @@ static void tcp_retransmit_timer(struct 
 
 	if (tp->retransmits == 0) {
 		if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
-			if (tp->sack_ok) {
+			if (tp->rx_opt.sack_ok) {
 				if (tp->ca_state == TCP_CA_Recovery)
 					NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
 				else
@@ -410,12 +421,14 @@ out_reset_timer:
 	if (tp->retransmits > sysctl_tcp_retries1)
 		__sk_dst_reset(sk);
 
-out:;
+out:
+	(void)set_exec_env(ve_old);
 }
 
 static void tcp_write_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock*)data;
+	struct ve_struct *env = set_exec_env(VE_OWNER_SK(sk));
 	struct tcp_opt *tp = tcp_sk(sk);
 	int event;
 
@@ -452,6 +465,7 @@ out:
 out_unlock:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
 
 /*
@@ -571,6 +585,7 @@ void tcp_set_keepalive(struct sock *sk, 
 static void tcp_keepalive_timer (unsigned long data)
 {
 	struct sock *sk = (struct sock *) data;
+	struct ve_struct *env = set_exec_env(VE_OWNER_SK(sk));
 	struct tcp_opt *tp = tcp_sk(sk);
 	__u32 elapsed;
 
@@ -645,6 +660,7 @@ death:	
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
 
 EXPORT_SYMBOL(tcp_clear_xmit_timers);
diff -Nurap linux-2.6.9-100.orig/net/ipv4/udp.c linux-2.6.9-ve023stab054/net/ipv4/udp.c
--- linux-2.6.9-100.orig/net/ipv4/udp.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv4/udp.c	2011-06-15 19:26:19.000000000 +0400
@@ -125,7 +125,9 @@ static int udp_v4_get_port(struct sock *
 	struct hlist_node *node;
 	struct sock *sk2;
 	struct inet_opt *inet = inet_sk(sk);
+	struct ve_struct *env;
 
+	env = VE_OWNER_SK(sk);
 	write_lock_bh(&udp_hash_lock);
 	if (!snum) {
 		int i, remaining;
@@ -137,7 +139,7 @@ static int udp_v4_get_port(struct sock *
 		best_size_so_far = UINT_MAX;
 		best = rover = net_random() % remaining + low;
 
-		if (!udp_lport_inuse(rover))
+		if (!udp_lport_inuse(rover, env))
 			goto gotit;
 
 		/* 1st pass: look for empty (or shortest) hash chain */
@@ -145,7 +147,7 @@ static int udp_v4_get_port(struct sock *
 			struct hlist_head *list;
 			int size = 0;
 
-			list = &udp_hash[rover & (UDP_HTABLE_SIZE - 1)];
+			list = &udp_hash[udp_hashfn(rover, VEID(env))];
 			if (hlist_empty(list))
 				goto gotit;
 
@@ -163,7 +165,7 @@ static int udp_v4_get_port(struct sock *
 		/* 2nd pass: find hole in shortest hash chain */
 		rover = best;
 		for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
-			if (!udp_lport_inuse(rover))
+			if (!udp_lport_inuse(rover, env))
 				goto gotit;
 			rover += UDP_HTABLE_SIZE;
 			if (rover > high)
@@ -176,11 +178,12 @@ gotit:
 		udp_port_rover = snum = rover;
 	} else {
 		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
+			    &udp_hash[udp_hashfn(snum, VEID(env))]) {
 			struct inet_opt *inet2 = inet_sk(sk2);
 
 			if (inet2->num == snum &&
 			    sk2 != sk &&
+			    ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
 			    !ipv6_only_sock(sk2) &&
 			    (!sk2->sk_bound_dev_if ||
 			     !sk->sk_bound_dev_if ||
@@ -194,7 +197,7 @@ gotit:
 	}
 	inet->num = snum;
 	if (sk_unhashed(sk)) {
-		struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+		struct hlist_head *h = &udp_hash[udp_hashfn(snum, VEID(env))];
 
 		sk_add_node(sk, h);
 		sock_prot_inc_use(sk->sk_prot);
@@ -231,11 +234,15 @@ struct sock *udp_v4_lookup_longway(u32 s
 	struct hlist_node *node;
 	unsigned short hnum = ntohs(dport);
 	int badness = -1;
+	struct ve_struct *env;
 
-	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+	env = get_exec_env();
+	sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
 		struct inet_opt *inet = inet_sk(sk);
 
-		if (inet->num == hnum && !ipv6_only_sock(sk)) {
+		if (inet->num == hnum &&
+		    ve_accessible_strict(VE_OWNER_SK(sk), env) &&
+		    !ipv6_only_sock(sk)) {
 			int score = (sk->sk_family == PF_INET ? 1 : 0);
 			if (inet->rcv_saddr) {
 				if (inet->rcv_saddr != daddr)
@@ -1064,7 +1071,8 @@ static int udp_v4_mcast_deliver(struct s
 	int dif;
 
 	read_lock(&udp_hash_lock);
-	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
+				VEID(VE_OWNER_SKB(skb)))]);
 	dif = skb->dev->ifindex;
 	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (sk) {
@@ -1386,10 +1394,14 @@ static struct sock *udp_get_first(struct
 {
 	struct sock *sk;
 	struct udp_iter_state *state = seq->private;
+	struct ve_struct *env;
 
+	env = get_exec_env();
 	for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
 		struct hlist_node *node;
 		sk_for_each(sk, node, &udp_hash[state->bucket]) {
+			if (!ve_accessible(VE_OWNER_SK(sk), env))
+				continue;
 			if (sk->sk_family == state->family)
 				goto found;
 		}
@@ -1406,8 +1418,13 @@ static struct sock *udp_get_next(struct 
 	do {
 		sk = sk_next(sk);
 try_again:
-		;
-	} while (sk && sk->sk_family != state->family);
+		if (!sk)
+			break;
+		if (sk->sk_family != state->family)
+			continue;
+		if (ve_accessible(VE_OWNER_SK(sk), get_exec_env()))
+			break;
+	} while (1);
 
 	if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
 		sk = sk_head(&udp_hash[state->bucket]);
diff -Nurap linux-2.6.9-100.orig/net/ipv6/addrconf.c linux-2.6.9-ve023stab054/net/ipv6/addrconf.c
--- linux-2.6.9-100.orig/net/ipv6/addrconf.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv6/addrconf.c	2011-06-15 19:26:19.000000000 +0400
@@ -1978,6 +1978,10 @@ static int addrconf_notify(struct notifi
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	int run_pending = 0;
 
+	/* not virtualized yet */
+	if (!ve_is_super(get_exec_env()))
+		return NOTIFY_OK;
+
 	switch(event) {
 	case NETDEV_UP:
 	case NETDEV_CHANGE:
diff -Nurap linux-2.6.9-100.orig/net/ipv6/ndisc.c linux-2.6.9-ve023stab054/net/ipv6/ndisc.c
--- linux-2.6.9-100.orig/net/ipv6/ndisc.c	2011-06-09 19:22:58.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv6/ndisc.c	2011-06-15 19:26:20.000000000 +0400
@@ -124,7 +124,7 @@ static struct neigh_ops ndisc_direct_ops
 	.queue_xmit =		dev_queue_xmit,
 };
 
-struct neigh_table nd_tbl = {
+struct neigh_table global_nd_tbl = {
 	.family =	AF_INET6,
 	.entry_size =	sizeof(struct neighbour) + sizeof(struct in6_addr),
 	.key_len =	sizeof(struct in6_addr),
@@ -135,7 +135,7 @@ struct neigh_table nd_tbl = {
 	.proxy_redo =	pndisc_redo,
 	.id =		"ndisc_cache",
 	.parms = {
-		.tbl =			&nd_tbl,
+		.tbl =			&global_nd_tbl,
 		.base_reachable_time =	30 * HZ,
 		.retrans_time =	 1 * HZ,
 		.gc_staletime =	60 * HZ,
@@ -1529,7 +1529,9 @@ int __init ndisc_init(struct net_proto_f
          * Initialize the neighbour table
          */
 	
-	neigh_table_init(&nd_tbl);
+	get_ve0()->ve_nd_tbl = &global_nd_tbl;
+	if (neigh_table_init(&nd_tbl))
+		panic("cannot initialize IPv6 NDISC tables\n");
 
 #ifdef CONFIG_SYSCTL
 	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, 
@@ -1549,3 +1551,48 @@ void ndisc_cleanup(void)
 	sock_release(ndisc_socket);
 	ndisc_socket = NULL; /* For safety. */
 }
+
+int ve_ndisc_init(struct ve_struct *ve)
+{
+	struct ve_struct *old_env;
+	int err;
+
+	ve->ve_nd_tbl = kmalloc(sizeof(struct neigh_table), GFP_KERNEL);
+	if (ve->ve_nd_tbl == NULL)
+		return -ENOMEM;
+
+	*(ve->ve_nd_tbl) = global_nd_tbl;
+	ve->ve_nd_tbl->parms.tbl = ve->ve_nd_tbl;
+	old_env = set_exec_env(ve);
+	err = neigh_table_init(ve->ve_nd_tbl);
+	if (err)
+		goto out_free;
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, 
+			      "ipv6", &ndisc_ifinfo_sysctl_change);
+#endif
+	err = 0;
+
+out:
+	set_exec_env(old_env);
+	return err;
+
+out_free:
+	kfree(ve->ve_nd_tbl);
+	ve->ve_nd_tbl = NULL;
+	goto out;
+}
+EXPORT_SYMBOL(ve_ndisc_init);
+
+void ve_ndisc_fini(struct ve_struct *ve)
+{
+	if (ve->ve_nd_tbl) {
+#ifdef CONFIG_SYSCTL
+		neigh_sysctl_unregister(&ve->ve_nd_tbl->parms);
+#endif
+		neigh_table_clear(ve->ve_nd_tbl);
+		kfree(ve->ve_nd_tbl);
+		ve->ve_nd_tbl = NULL;
+	}
+}
+EXPORT_SYMBOL(ve_ndisc_fini);
diff -Nurap linux-2.6.9-100.orig/net/ipv6/netfilter/ip6t_owner.c linux-2.6.9-ve023stab054/net/ipv6/netfilter/ip6t_owner.c
--- linux-2.6.9-100.orig/net/ipv6/netfilter/ip6t_owner.c	2004-10-19 01:55:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv6/netfilter/ip6t_owner.c	2011-06-15 19:26:19.000000000 +0400
@@ -23,6 +23,7 @@ MODULE_LICENSE("GPL");
 static int
 match_pid(const struct sk_buff *skb, pid_t pid)
 {
+#ifndef CONFIG_VE
 	struct task_struct *p;
 	struct files_struct *files;
 	int i;
@@ -48,12 +49,14 @@ match_pid(const struct sk_buff *skb, pid
 	task_unlock(p);
 out:
 	read_unlock(&tasklist_lock);
+#endif
 	return 0;
 }
 
 static int
 match_sid(const struct sk_buff *skb, pid_t sid)
 {
+#ifndef CONFIG_VE
 	struct task_struct *g, *p;
 	struct file *file = skb->sk->sk_socket->file;
 	int i, found=0;
@@ -84,6 +87,9 @@ out:
 	read_unlock(&tasklist_lock);
 
 	return found;
+#else
+	return 0;
+#endif
 }
 
 static int
diff -Nurap linux-2.6.9-100.orig/net/ipv6/tcp_ipv6.c linux-2.6.9-ve023stab054/net/ipv6/tcp_ipv6.c
--- linux-2.6.9-100.orig/net/ipv6/tcp_ipv6.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv6/tcp_ipv6.c	2011-06-15 19:26:19.000000000 +0400
@@ -143,7 +143,7 @@ static int tcp_v6_get_port(struct sock *
 		do {	rover++;
 			if ((rover < low) || (rover > high))
 				rover = low;
-			head = &tcp_bhash[tcp_bhashfn(rover)];
+			head = &tcp_bhash[tcp_bhashfn(rover, 0)];
 			spin_lock(&head->lock);
 			tb_for_each(tb, node, &head->chain)
 				if (tb->port == rover)
@@ -163,7 +163,7 @@ static int tcp_v6_get_port(struct sock *
 		/* OK, here is the one we will use. */
 		snum = rover;
 	} else {
-		head = &tcp_bhash[tcp_bhashfn(snum)];
+		head = &tcp_bhash[tcp_bhashfn(snum, 0)];
 		spin_lock(&head->lock);
 		tb_for_each(tb, node, &head->chain)
 			if (tb->port == snum)
@@ -184,7 +184,7 @@ tb_found:
 	}
 tb_not_found:
 	ret = 1;
-	if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
+	if (!tb && (tb = tcp_bucket_create(head, snum, NULL)) == NULL)
 		goto fail_unlock;
 	if (hlist_empty(&tb->owners)) {
 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
@@ -256,7 +256,7 @@ static struct sock *tcp_v6_lookup_listen
 
 	hiscore=0;
 	read_lock(&tcp_lhash_lock);
-	sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
+	sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum, 0)]) {
 		if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 			
@@ -471,8 +471,8 @@ static int tcp_v6_check_established(stru
 				tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
 				if (!tp->write_seq)
 					tp->write_seq = 1;
-				tp->ts_recent = tw->tw_ts_recent;
-				tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
+				tp->rx_opt.ts_recent = tw->tw_ts_recent;
+				tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
 				sock_hold(sk2);
 				goto unique;
 			} else
@@ -523,7 +523,7 @@ static int tcp_v6_hash_connect(struct so
 		inet_sk(sk)->sport = htons(inet_sk(sk)->num);
 	}
 
-	head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
+	head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num, 0)];
 	tb = tb_head(head);
 
 	spin_lock_bh(&head->lock);
@@ -607,10 +607,10 @@ static int tcp_v6_connect(struct sock *s
 			return -EINVAL;
 	}
 
-	if (tp->ts_recent_stamp &&
+	if (tp->rx_opt.ts_recent_stamp &&
 	    ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
-		tp->ts_recent = 0;
-		tp->ts_recent_stamp = 0;
+		tp->rx_opt.ts_recent = 0;
+		tp->rx_opt.ts_recent_stamp = 0;
 		tp->write_seq = 0;
 	}
 
@@ -695,13 +695,15 @@ static int tcp_v6_connect(struct sock *s
 	ip6_dst_store(sk, dst, NULL);
 	sk->sk_route_caps = dst->dev->features &
 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+ 	if (!sysctl_tcp_use_sg)
+ 		sk->sk_route_caps &= ~NETIF_F_SG;
 
 	tp->ext_header_len = 0;
 	if (np->opt)
 		tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
 	tp->ext2_header_len = dst->header_len;
 
-	tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
 	inet->dport = usin->sin6_port;
 
@@ -1200,7 +1202,8 @@ static void tcp_v6_synq_add(struct sock 
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct tcp_opt tmptp, *tp = tcp_sk(sk);
+	struct tcp_options_received tmp_opt;
+	struct tcp_opt *tp = tcp_sk(sk);
 	struct open_request *req = NULL;
 	__u32 isn = TCP_SKB_CB(skb)->when;
 
@@ -1226,14 +1229,14 @@ static int tcp_v6_conn_request(struct so
 	if (req == NULL)
 		goto drop;
 
-	tcp_clear_options(&tmptp);
-	tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
-	tmptp.user_mss = tp->user_mss;
+	tcp_clear_options(&tmp_opt);
+	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+	tmp_opt.user_mss = tp->rx_opt.user_mss;
 
-	tcp_parse_options(skb, &tmptp, 0);
+	tcp_parse_options(skb, &tmp_opt, 0);
 
-	tmptp.tstamp_ok = tmptp.saw_tstamp;
-	tcp_openreq_init(req, &tmptp, skb);
+	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
+	tcp_openreq_init(req, &tmp_opt, skb);
 
 	req->class = &or_ipv6;
 	ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
@@ -1386,6 +1389,8 @@ static struct sock * tcp_v6_syn_recv_soc
 	ip6_dst_store(newsk, dst, NULL);
 	newsk->sk_route_caps = dst->dev->features &
 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+ 	if (!sysctl_tcp_use_sg)
+ 		sk->sk_route_caps &= ~NETIF_F_SG;
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
 	newtcp6sk->pinet6 = &newtcp6sk->inet6;
@@ -1719,12 +1724,14 @@ do_time_wait:
 		goto discard_it;
 	}
 
+	spin_lock(&((struct tcp_tw_bucket *)sk)->tw_lock);
 	switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
 					  skb, th, skb->len)) {
 	case TCP_TW_SYN:
 	{
 		struct sock *sk2;
 
+		spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
 		sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
 		if (sk2 != NULL) {
 			tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
@@ -1738,9 +1745,13 @@ do_time_wait:
 		tcp_v6_timewait_ack(sk, skb);
 		break;
 	case TCP_TW_RST:
+		spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+		tcp_tw_put((struct tcp_tw_bucket *)sk);
 		goto no_tcp_socket;
 	case TCP_TW_SUCCESS:;
 	}
+	spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+	tcp_tw_put((struct tcp_tw_bucket *)sk);
 	goto discard_it;
 }
 
@@ -1790,6 +1801,10 @@ static int tcp_v6_rebuild_header(struct 
 		ip6_dst_store(sk, dst, NULL);
 		sk->sk_route_caps = dst->dev->features &
 			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+		if (!sysctl_tcp_use_sg)
+			sk->sk_route_caps &= ~NETIF_F_SG;
+		if (!sysctl_tcp_use_sg)
+			sk->sk_route_caps &= ~NETIF_F_SG;
 		tcp_sk(sk)->ext2_header_len = dst->header_len;
 	}
 
diff -Nurap linux-2.6.9-100.orig/net/ipv6/udp.c linux-2.6.9-ve023stab054/net/ipv6/udp.c
--- linux-2.6.9-100.orig/net/ipv6/udp.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/ipv6/udp.c	2011-06-15 19:26:19.000000000 +0400
@@ -67,7 +67,9 @@ static int udp_v6_get_port(struct sock *
 {
 	struct sock *sk2;
 	struct hlist_node *node;
+	struct ve_struct *env;
 
+	env = VE_OWNER_SK(sk);
 	write_lock_bh(&udp_hash_lock);
 	if (!snum) {
 		int i, remaining;
@@ -87,7 +89,7 @@ static int udp_v6_get_port(struct sock *
 			int size = 0;
 			struct hlist_head *list;
 
-			list = &udp_hash[rover & (UDP_HTABLE_SIZE - 1)];
+			list = &udp_hash[udp_hashfn(rover, VEID(env))];
 			if (hlist_empty(list))
 				goto gotit;
 
@@ -105,7 +107,7 @@ static int udp_v6_get_port(struct sock *
 		/* 2nd pass: find hole in shortest hash chain */
 		rover = best;
 		for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
-			if (!udp_lport_inuse(rover))
+			if (!udp_lport_inuse(rover, env))
 				goto gotit;
 			rover += UDP_HTABLE_SIZE;
 			if (rover > high)
@@ -118,9 +120,10 @@ gotit:
 		udp_port_rover = snum = rover;
 	} else {
 		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
+			    &udp_hash[udp_hashfn(snum, VEID(env))]) {
 			if (inet_sk(sk2)->num == snum &&
 			    sk2 != sk &&
+			    ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
 			    (!sk2->sk_bound_dev_if ||
 			     !sk->sk_bound_dev_if ||
 			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
@@ -132,7 +135,7 @@ gotit:
 
 	inet_sk(sk)->num = snum;
 	if (sk_unhashed(sk)) {
-		sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
+		sk_add_node(sk, &udp_hash[udp_hashfn(snum, VEID(env))]);
 		sock_prot_inc_use(sk->sk_prot);
 	}
 	write_unlock_bh(&udp_hash_lock);
diff -Nurap linux-2.6.9-100.orig/net/netlink/af_netlink.c linux-2.6.9-ve023stab054/net/netlink/af_netlink.c
--- linux-2.6.9-100.orig/net/netlink/af_netlink.c	2011-06-09 19:22:47.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/netlink/af_netlink.c	2011-06-15 19:26:20.000000000 +0400
@@ -53,6 +53,10 @@
 #include <linux/audit.h>
 #include <net/sock.h>
 #include <net/scm.h>
+#include <linux/swap.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_net.h>
 
 #define Nprintk(a...)
 
@@ -60,20 +64,6 @@
 #define NL_EMULATE_DEV
 #endif
 
-struct netlink_opt
-{
-	u32			pid;
-	unsigned int		groups;
-	u32			dst_pid;
-	unsigned int		dst_groups;
-	unsigned long		state;
-	int			(*handler)(int unit, struct sk_buff *skb);
-	wait_queue_head_t	wait;
-	struct netlink_callback	*cb;
-	spinlock_t		cb_lock;
-	void			(*data_ready)(struct sock *sk, int bytes);
-};
-
 #define nlk_sk(__sk) ((struct netlink_opt *)(__sk)->sk_protinfo)
 
 struct nl_pid_hash {
@@ -192,7 +182,10 @@ static __inline__ struct sock *netlink_l
 	read_lock(&nl_table_lock);
 	head = nl_pid_hashfn(hash, pid);
 	sk_for_each(sk, node, head) {
-		if (nlk_sk(sk)->pid == pid) {
+		/* VEs should find sockets, created by kernel */
+		if ((nlk_sk(sk)->pid == pid) &&
+			(!pid || ve_accessible_strict(VE_OWNER_SK(sk),
+				get_exec_env()))){
 			sock_hold(sk);
 			goto found;
 		}
@@ -292,7 +285,9 @@ static int netlink_insert(struct sock *s
 	head = nl_pid_hashfn(hash, pid);
 	len = 0;
 	sk_for_each(osk, node, head) {
-		if (nlk_sk(osk)->pid == pid)
+		if ((nlk_sk(osk)->pid == pid) &&
+			ve_accessible_strict(VE_OWNER_SK(osk),
+				get_exec_env()))
 			break;
 		len++;
 	}
@@ -347,15 +342,16 @@ static int netlink_create(struct socket 
 	sk = sk_alloc(PF_NETLINK, GFP_KERNEL, 1, NULL);
 	if (!sk)
 		return -ENOMEM;
+	if (ub_other_sock_charge(sk))
+		goto out_free;
 
 	sock_init_data(sock,sk);
 	sk_set_owner(sk, THIS_MODULE);
 
 	nlk = sk->sk_protinfo = kmalloc(sizeof(*nlk), GFP_KERNEL);
-	if (!nlk) {
-		sk_free(sk);
-		return -ENOMEM;
-	}
+	if (!nlk)
+		goto out_free;
+
 	memset(nlk, 0, sizeof(*nlk));
 
 	spin_lock_init(&nlk->cb_lock);
@@ -364,6 +360,10 @@ static int netlink_create(struct socket 
 
 	sk->sk_protocol = protocol;
 	return 0;
+
+out_free:
+	sk_free(sk);
+	return -ENOMEM;
 }
 
 static int netlink_release(struct socket *sock)
@@ -375,6 +375,7 @@ static int netlink_release(struct socket
 		return 0;
 
 	netlink_remove(sk);
+	sock_orphan(sk);
 	nlk = nlk_sk(sk);
 
 	spin_lock(&nlk->cb_lock);
@@ -382,14 +383,12 @@ static int netlink_release(struct socket
 		nlk->cb->done(nlk->cb);
 		netlink_destroy_callback(nlk->cb);
 		nlk->cb = NULL;
-		__sock_put(sk);
 	}
 	spin_unlock(&nlk->cb_lock);
 
 	/* OK. Socket is unlinked, and, therefore,
 	   no new packets will arrive */
 
-	sock_orphan(sk);
 	sock->sk = NULL;
 	wake_up_interruptible_all(&nlk->wait);
 
@@ -414,7 +413,7 @@ static int netlink_autobind(struct socke
 	struct hlist_head *head;
 	struct sock *osk;
 	struct hlist_node *node;
-	s32 pid = current->pid;
+	s32 pid = virt_pid(current);
 	int err;
 	static s32 rover = -4097;
 
@@ -423,7 +422,9 @@ retry:
 	netlink_table_grab();
 	head = nl_pid_hashfn(hash, pid);
 	sk_for_each(osk, node, head) {
-		if (nlk_sk(osk)->pid == pid) {
+		if ((nlk_sk(osk)->pid == pid) &&
+			ve_accessible_strict(VE_OWNER_SK(osk),
+				get_exec_env())){
 			/* Bind collision, search negative pid values. */
 			pid = rover--;
 			if (rover > -4097)
@@ -449,7 +450,7 @@ retry:
 static inline int netlink_capable(struct socket *sock, unsigned int flag) 
 { 
 	return (nl_nonroot[sock->sk->sk_protocol] & flag) ||
-	       capable(CAP_NET_ADMIN);
+	       capable(CAP_VE_NET_ADMIN);
 } 
 
 static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
@@ -600,7 +601,8 @@ struct sock *netlink_getsockbyfilp(struc
  * 0: continue
  * 1: repeat lookup - reference dropped while waiting for socket memory.
  */
-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo)
+int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
+		      long timeo, struct sock *ssk)
 {
 	struct netlink_opt *nlk;
 
@@ -614,7 +616,7 @@ int netlink_attachskb(struct sock *sk, s
 	    test_bit(0, &nlk->state)) {
 		DECLARE_WAITQUEUE(wait, current);
 		if (!timeo) {
-			if (!nlk->pid)
+			if (!ssk || nlk_sk(ssk)->pid == 0)
 				netlink_overrun(sk);
 			sock_put(sk);
 			kfree_skb(skb);
@@ -657,6 +659,11 @@ int netlink_sendskb(struct sock *sk, str
 		return len;
 	}
 #endif
+	if (ub_sockrcvbuf_charge(sk, skb) < 0) {
+		sock_put(sk);
+		kfree_skb(skb);
+		return -EACCES;
+	}
 
 	skb_queue_tail(&sk->sk_receive_queue, skb);
 	sk->sk_data_ready(sk, len);
@@ -709,7 +716,7 @@ retry:
 		kfree_skb(skb);
 		return PTR_ERR(sk);
 	}
-	err = netlink_attachskb(sk, skb, nonblock, timeo);
+	err = netlink_attachskb(sk, skb, nonblock, timeo, ssk);
 	if (err == 1)
 		goto retry;
 	if (err)
@@ -729,11 +736,15 @@ static __inline__ int netlink_broadcast_
 #endif
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
 	    !test_bit(0, &nlk->state)) {
+		if (ub_sockrcvbuf_charge(sk, skb))
+			goto out;
 		skb_set_owner_r(skb, sk);
 		skb_queue_tail(&sk->sk_receive_queue, skb);
 		sk->sk_data_ready(sk, skb->len);
+		/* do not uncharge as >= 0 is treated as success */
 		return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf;
 	}
+out:
 	return -1;
 }
 
@@ -760,6 +771,9 @@ static inline int do_one_broadcast(struc
 	if (nlk->pid == p->pid || !(nlk->groups & p->group))
 		goto out;
 
+	if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
+		goto out;
+
 	if (p->failure) {
 		netlink_overrun(sk);
 		goto out;
@@ -856,6 +870,9 @@ static inline int do_one_set_err(struct 
 	if (nlk->pid == p->pid || !(nlk->groups & p->group))
 		goto out;
 
+	if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
+		goto out;
+
 	sk->sk_err = p->code;
 	sk->sk_error_report(sk);
 out:
@@ -897,12 +914,17 @@ static int netlink_sendmsg(struct kiocb 
 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct netlink_opt *nlk = nlk_sk(sk);
-	struct sockaddr_nl *addr=msg->msg_name;
+	struct sockaddr_nl *addr = msg->msg_name;
 	u32 dst_pid;
-	u32 dst_groups;
 	struct sk_buff *skb;
 	int err;
 	struct scm_cookie scm;
+	struct sock *dstsk;
+	long timeo;
+	int no_ubc, no_buf;
+	unsigned long chargesize;
+
+	DECLARE_WAITQUEUE(wait, current);
 
 	if (msg->msg_flags&MSG_OOB)
 		return -EOPNOTSUPP;
@@ -913,17 +935,16 @@ static int netlink_sendmsg(struct kiocb 
 	if (err < 0)
 		return err;
 
+	/* Broadcasts are disabled as it was in 2.4 with UBC. According to
+	 * ANK this is OK. Den */
 	if (msg->msg_namelen) {
 		if (addr->nl_family != AF_NETLINK)
 			return -EINVAL;
 		dst_pid = addr->nl_pid;
-		dst_groups = addr->nl_groups;
-		if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND))
+		if (addr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
 			return -EPERM;
-	} else {
+	} else
 		dst_pid = nlk->dst_pid;
-		dst_groups = nlk->dst_groups;
-	}
 
 	if (!nlk->pid) {
 		err = netlink_autobind(sock);
@@ -936,13 +957,13 @@ static int netlink_sendmsg(struct kiocb 
 		goto out;
 	err = -ENOBUFS;
 	skb = alloc_skb(len, GFP_KERNEL);
-	if (skb==NULL)
+	if (skb == NULL)
 		goto out;
 
 	NETLINK_CB(skb).pid	= nlk->pid;
 	NETLINK_CB(skb).groups	= nlk->groups;
 	NETLINK_CB(skb).dst_pid = dst_pid;
-	NETLINK_CB(skb).dst_groups = dst_groups;
+	NETLINK_CB(skb).dst_groups = 0;
 	NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
 	memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
 
@@ -953,25 +974,88 @@ static int netlink_sendmsg(struct kiocb 
 	 */
 
 	err = -EFAULT;
-	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
-		kfree_skb(skb);
-		goto out;
-	}
+	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
+		goto out_free;
 
 	err = security_netlink_send(sk, skb);
-	if (err) {
-		kfree_skb(skb);
-		goto out;
+	if (err)
+		goto out_free;
+
+	timeo = sock_sndtimeo(sk, msg->msg_flags&MSG_DONTWAIT);
+retry:
+	dstsk = netlink_getsockbypid(sk, dst_pid);
+	if (IS_ERR(dstsk)) {
+		err = PTR_ERR(dstsk);
+		goto out_free;
 	}
 
-	if (dst_groups) {
-		atomic_inc(&skb->users);
-		netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL);
+	nlk = nlk_sk(dstsk);
+#ifdef NL_EMULATE_DEV
+	if (nlk->handler) {
+		skb_orphan(skb);
+		err = nlk->handler(dstsk->sk_protocol, skb);
+		goto out_put;
 	}
-	err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
+#endif
+
+	/* BTW, it could be done once, before the retry loop */
+	chargesize = skb_charge_fullsize(skb);
+	no_ubc = ub_sock_getwres_other(sk, chargesize);
+	no_buf = atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
+		test_bit(0, &nlk->state);
+	if (no_ubc || no_buf) {
+		wait_queue_head_t *sleep;
+
+		if (!no_ubc)
+			ub_sock_retwres_other(sk, chargesize,
+					      SOCK_MIN_UBCSPACE_CH);
+		err = -EAGAIN;
+		if (timeo == 0) {
+			kfree_skb(skb);
+			goto out_put;
+		}
+
+		/* wake up comes to different queues */
+		sleep = no_ubc ? sk->sk_sleep : &nlk->wait;
+		__set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(sleep, &wait);
+
+		/* this if can't be moved upper because ub_sock_snd_queue_add()
+		 * may change task state to TASK_RUNNING */
+		if (no_ubc)
+			ub_sock_sndqueueadd_other(sk, chargesize);
+
+		if ((atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
+		     test_bit(0, &nlk->state) || no_ubc) &&
+		    !sock_flag(dstsk, SOCK_DEAD))
+			timeo = schedule_timeout(timeo);
 
+		__set_current_state(TASK_RUNNING);
+		remove_wait_queue(sleep, &wait);
+		if (no_ubc)
+			ub_sock_sndqueuedel(sk);
+		sock_put(dstsk);
+
+		if (!signal_pending(current))
+			goto retry;
+		err = sock_intr_errno(timeo);
+		goto out_free;
+	}
+
+	skb_orphan(skb);
+	skb_set_owner_r(skb, dstsk);
+	ub_skb_set_charge(skb, sk, chargesize, UB_OTHERSOCKBUF);
+	skb_queue_tail(&dstsk->sk_receive_queue, skb);
+	dstsk->sk_data_ready(dstsk, len);
+	err = len;
+out_put:
+	sock_put(dstsk);
 out:
 	return err;
+
+out_free:
+	kfree_skb(skb);
+	return err;
 }
 
 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
@@ -1108,6 +1192,10 @@ static int netlink_dump(struct sock *sk)
 	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
+	if (ub_nlrcvbuf_charge(skb, sk)	< 0) {
+		kfree_skb(skb);
+		return -EACCES;
+	}
 
 	spin_lock(&nlk->cb_lock);
 
@@ -1138,7 +1226,6 @@ static int netlink_dump(struct sock *sk)
 	spin_unlock(&nlk->cb_lock);
 
 	netlink_destroy_callback(cb);
-	sock_put(sk);
 	return 0;
 }
 
@@ -1168,9 +1255,9 @@ int netlink_dump_start(struct sock *ssk,
 		return -ECONNREFUSED;
 	}
 	nlk = nlk_sk(sk);
-	/* A dump is in progress... */
+	/* A dump or destruction is in progress... */
 	spin_lock(&nlk->cb_lock);
-	if (nlk->cb) {
+	if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
 		spin_unlock(&nlk->cb_lock);
 		netlink_destroy_callback(cb);
 		sock_put(sk);
@@ -1180,6 +1267,7 @@ int netlink_dump_start(struct sock *ssk,
 	spin_unlock(&nlk->cb_lock);
 
 	netlink_dump(sk);
+	sock_put(sk);
 	return 0;
 }
 
@@ -1476,10 +1564,10 @@ enomem:
 
 	memset(nl_table, 0, sizeof(*nl_table) * MAX_LINKS);
 
-	if (num_physpages >= (128 * 1024))
-		max = num_physpages >> (21 - PAGE_SHIFT);
+	if (nr_lowmem_pages() >= (128 * 1024))
+		max = nr_lowmem_pages() >> (21 - PAGE_SHIFT);
 	else
-		max = num_physpages >> (23 - PAGE_SHIFT);
+		max = nr_lowmem_pages() >> (23 - PAGE_SHIFT);
 
 	order = get_bitmask_order(max) - 1 + PAGE_SHIFT;
 	max = (1UL << order) / sizeof(struct hlist_head);
@@ -1505,6 +1593,7 @@ enomem:
 
 	sock_register(&netlink_family_ops);
 #ifdef CONFIG_PROC_FS
+	/* FIXME: virtualize before give access from VEs */
 	proc_net_fops_create("netlink", 0, &netlink_seq_fops);
 #endif
 	/* The netlink device handler may be needed early. */ 
diff -Nurap linux-2.6.9-100.orig/net/packet/af_packet.c linux-2.6.9-ve023stab054/net/packet/af_packet.c
--- linux-2.6.9-100.orig/net/packet/af_packet.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/packet/af_packet.c	2011-06-15 19:26:19.000000000 +0400
@@ -73,6 +73,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
+#include <ub/ub_net.h>
+
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
 #endif
@@ -262,7 +264,8 @@ static int packet_rcv_spkt(struct sk_buf
 	 *	so that this procedure is noop.
 	 */
 
-	if (skb->pkt_type == PACKET_LOOPBACK)
+	if (skb->pkt_type == PACKET_LOOPBACK ||
+	    !ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
 		goto out;
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
@@ -454,6 +457,9 @@ static int packet_rcv(struct sk_buff *sk
 	sk = pt->af_packet_priv;
 	po = pkt_sk(sk);
 
+	if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
+		goto drop;
+
 	skb->dev = dev;
 
 	if (dev->hard_header) {
@@ -513,6 +519,9 @@ static int packet_rcv(struct sk_buff *sk
 	if (pskb_trim(skb, snaplen))
 		goto drop_n_acct;
 
+	if (ub_sockrcvbuf_charge(sk, skb))
+		goto drop_n_acct;
+
 	skb_set_owner_r(skb, sk);
 	skb->dev = NULL;
 	dst_release(skb->dst);
@@ -563,6 +572,9 @@ static int tpacket_rcv(struct sk_buff *s
 	sk = pt->af_packet_priv;
 	po = pkt_sk(sk);
 
+	if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
+		goto drop;
+
 	if (dev->hard_header) {
 		if (sk->sk_type != SOCK_DGRAM)
 			skb_push(skb, skb->data - skb->mac.raw);
@@ -612,6 +624,12 @@ static int tpacket_rcv(struct sk_buff *s
 	if (snaplen > skb->len-skb->data_len)
 		snaplen = skb->len-skb->data_len;
 
+	if (copy_skb &&
+	    ub_sockrcvbuf_charge(sk, copy_skb)) {
+		spin_lock(&sk->sk_receive_queue.lock);
+		goto ring_is_full;
+	}
+
 	spin_lock(&sk->sk_receive_queue.lock);
 	h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
 	
@@ -985,6 +1003,8 @@ static int packet_create(struct socket *
 	sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1, NULL);
 	if (sk == NULL)
 		goto out;
+	if (ub_other_sock_charge(sk))
+		goto out_free;
 
 	sock->ops = &packet_ops;
 #ifdef CONFIG_SOCK_PACKET
@@ -1405,11 +1425,16 @@ static int packet_notifier(struct notifi
 	struct sock *sk;
 	struct hlist_node *node;
 	struct net_device *dev = (struct net_device*)data;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	read_lock(&packet_sklist_lock);
 	sk_for_each(sk, node, &packet_sklist) {
 		struct packet_opt *po = pkt_sk(sk);
 
+		if (!ve_accessible_strict(VE_OWNER_SK(sk), ve))
+			continue;
+
 		switch (msg) {
 		case NETDEV_UNREGISTER:
 #ifdef CONFIG_PACKET_MULTICAST
@@ -1813,6 +1838,8 @@ static inline struct sock *packet_seq_id
 	struct hlist_node *node;
 
 	sk_for_each(s, node, &packet_sklist) {
+		if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
+			continue;
 		if (!off--)
 			return s;
 	}
@@ -1828,9 +1855,13 @@ static void *packet_seq_start(struct seq
 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	++*pos;
-	return  (v == SEQ_START_TOKEN) 
-		? sk_head(&packet_sklist) 
-		: sk_next((struct sock*)v) ;
+	do {
+		v = (v == SEQ_START_TOKEN) 
+			? sk_head(&packet_sklist) 
+			: sk_next((struct sock*)v);
+	} while (v != NULL &&
+		!ve_accessible(VE_OWNER_SK((struct sock*)v), get_exec_env()));	
+	return v;
 }
 
 static void packet_seq_stop(struct seq_file *seq, void *v)
diff -Nurap linux-2.6.9-100.orig/net/rose/rose_route.c linux-2.6.9-ve023stab054/net/rose/rose_route.c
--- linux-2.6.9-100.orig/net/rose/rose_route.c	2004-10-19 01:55:28.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/rose/rose_route.c	2011-06-15 19:26:18.000000000 +0400
@@ -727,7 +727,8 @@ int rose_rt_ioctl(unsigned int cmd, void
 		}
 		if (rose_route.mask > 10) /* Mask can't be more than 10 digits */
 			return -EINVAL;
-
+		if (rose_route.ndigis > 8) /* No more than 8 digipeats */
+			return -EINVAL;
 		err = rose_add_node(&rose_route, dev);
 		dev_put(dev);
 		return err;
diff -Nurap linux-2.6.9-100.orig/net/sched/sch_api.c linux-2.6.9-ve023stab054/net/sched/sch_api.c
--- linux-2.6.9-100.orig/net/sched/sch_api.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/sched/sch_api.c	2011-06-15 19:26:19.000000000 +0400
@@ -196,10 +196,14 @@ struct Qdisc *qdisc_lookup(struct net_de
 {
 	struct Qdisc *q;
 
+	read_lock_bh(&qdisc_tree_lock);
 	list_for_each_entry(q, &dev->qdisc_list, list) {
-		if (q->handle == handle)
+		if (q->handle == handle) {
+			read_unlock_bh(&qdisc_tree_lock);
 			return q;
+		}
 	}
+	read_unlock_bh(&qdisc_tree_lock);
 	return NULL;
 }
 
@@ -1195,7 +1199,7 @@ static int __init pktsched_init(void)
 
 	register_qdisc(&pfifo_qdisc_ops);
 	register_qdisc(&bfifo_qdisc_ops);
-	proc_net_fops_create("psched", 0, &psched_fops);
+	__proc_net_fops_create("net/psched", 0, &psched_fops, NULL);
 
 	return 0;
 }
diff -Nurap linux-2.6.9-100.orig/net/sched/sch_cbq.c linux-2.6.9-ve023stab054/net/sched/sch_cbq.c
--- linux-2.6.9-100.orig/net/sched/sch_cbq.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/sched/sch_cbq.c	2011-06-15 19:26:19.000000000 +0400
@@ -956,8 +956,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int 
 
 			if (cl->deficit <= 0) {
 				q->active[prio] = cl;
-				cl = cl->next_alive;
 				cl->deficit += cl->quantum;
+				cl = cl->next_alive;
 			}
 			return skb;
 
@@ -1133,17 +1133,19 @@ static void cbq_normalize_quanta(struct 
 
 	for (h=0; h<16; h++) {
 		for (cl = q->classes[h]; cl; cl = cl->next) {
+			long mtu;
 			/* BUGGGG... Beware! This expression suffer of
 			   arithmetic overflows!
 			 */
 			if (cl->priority == prio) {
-				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
-					q->quanta[prio];
-			}
-			if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
-				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
-				cl->quantum = cl->qdisc->dev->mtu/2 + 1;
+				cl->quantum = (cl->weight * cl->allot) /
+					(q->quanta[prio] / q->nclasses[prio]);
 			}
+			mtu = cl->qdisc->dev->mtu;
+			if (cl->quantum <= mtu/2)
+				cl->quantum = mtu/2 + 1;
+			else if (cl->quantum > 32*mtu) 
+				cl->quantum = 32*mtu;
 		}
 	}
 }
@@ -1752,6 +1754,8 @@ static void cbq_destroy_class(struct Qdi
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 
+	BUG_TRAP(!cl->filters);
+
 	cbq_destroy_filters(cl);
 	qdisc_destroy(cl->q);
 	qdisc_put_rtab(cl->R_tab);
@@ -1772,6 +1776,14 @@ cbq_destroy(struct Qdisc* sch)
 #ifdef CONFIG_NET_CLS_POLICE
 	q->rx_class = NULL;
 #endif
+	/*
+	 * Filters must be destroyed first because we don't destroy the
+	 * classes from root to leafs which means that filters can still
+	 * be bound to classes which have been destroyed already. --TGR '04
+	 */
+	for (h = 0; h < 16; h++)
+		for (cl = q->classes[h]; cl; cl = cl->next)
+			cbq_destroy_filters(cl);
 
 	for (h = 0; h < 16; h++) {
 		struct cbq_class *next;
diff -Nurap linux-2.6.9-100.orig/net/sched/sch_generic.c linux-2.6.9-ve023stab054/net/sched/sch_generic.c
--- linux-2.6.9-100.orig/net/sched/sch_generic.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/sched/sch_generic.c	2011-06-15 19:26:19.000000000 +0400
@@ -97,6 +97,9 @@ int qdisc_restart(struct net_device *dev
 
 	/* Dequeue packet */
 	if ((skb = q->dequeue(q)) != NULL) {
+		struct ve_struct *envid;
+
+		envid = set_exec_env(VE_OWNER_SKB(skb));
 		unsigned nolock = (dev->features & NETIF_F_LLTX);
 		/*
 		 * When the driver has LLTX set it does its own locking
@@ -121,6 +124,7 @@ int qdisc_restart(struct net_device *dev
 					kfree_skb(skb);
 					if (net_ratelimit())
 						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+					(void)set_exec_env(envid);
 					return -1;
 				}
 				__get_cpu_var(netdev_rx_stat).cpu_collision++;
@@ -146,6 +150,7 @@ int qdisc_restart(struct net_device *dev
 						spin_unlock(&dev->xmit_lock);
 					}
 					spin_lock(&dev->queue_lock);
+					(void)set_exec_env(envid);
 					return -1;
 				}
 				if (ret == NETDEV_TX_LOCKED && nolock) {
@@ -177,6 +182,7 @@ int qdisc_restart(struct net_device *dev
 requeue:
 		q->ops->requeue(skb, q);
 		netif_schedule(dev);
+		(void)set_exec_env(envid);
 		return 1;
 	}
 	return q->q.qlen;
@@ -588,3 +594,4 @@ EXPORT_SYMBOL(qdisc_reset);
 EXPORT_SYMBOL(qdisc_restart);
 EXPORT_SYMBOL(qdisc_lock_tree);
 EXPORT_SYMBOL(qdisc_unlock_tree);
+EXPORT_SYMBOL(dev_shutdown);
diff -Nurap linux-2.6.9-100.orig/net/sched/sch_teql.c linux-2.6.9-ve023stab054/net/sched/sch_teql.c
--- linux-2.6.9-100.orig/net/sched/sch_teql.c	2004-10-19 01:53:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/sched/sch_teql.c	2011-06-15 19:26:19.000000000 +0400
@@ -187,6 +187,9 @@ static int teql_qdisc_init(struct Qdisc 
 	struct teql_master *m = (struct teql_master*)sch->ops;
 	struct teql_sched_data *q = qdisc_priv(sch);
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	if (dev->hard_header_len > m->dev->hard_header_len)
 		return -EINVAL;
 
diff -Nurap linux-2.6.9-100.orig/net/socket.c linux-2.6.9-ve023stab054/net/socket.c
--- linux-2.6.9-100.orig/net/socket.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/socket.c	2011-06-15 19:26:22.000000000 +0400
@@ -83,6 +83,7 @@
 #include <linux/compat.h>
 #include <linux/kmod.h>
 #include <linux/audit.h>
+#include <linux/in.h>
 
 #ifdef CONFIG_NET_RADIO
 #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
@@ -520,6 +521,9 @@ struct file_operations bad_sock_fops = {
  
 void sock_release(struct socket *sock)
 {
+	if (sock->sk)
+		ub_sock_sndqueuedel(sock->sk);
+
 	if (sock->ops) {
 		struct module *owner = sock->ops->owner;
 
@@ -1089,6 +1093,37 @@ int sock_wake_async(struct socket *sock,
 	return 0;
 }
 
+int vz_security_proto_check(int family, int type, int protocol)
+{
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		return 0;
+
+	switch (family) {
+	case PF_UNSPEC:
+	case PF_PACKET:
+	case PF_NETLINK:
+	case PF_UNIX:
+		break;
+	case PF_INET:
+		switch (protocol) {
+		case  IPPROTO_IP:
+		case  IPPROTO_ICMP:
+		case  IPPROTO_TCP:
+		case  IPPROTO_UDP:
+		case  IPPROTO_RAW:
+			break;
+		default:
+			return -EAFNOSUPPORT;
+		}
+		break;
+	default:
+		return -EAFNOSUPPORT;
+	}
+#endif
+	return 0;
+}
+
 static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
 {
 	int i;
@@ -1117,6 +1152,11 @@ static int __sock_create(int family, int
 		family = PF_PACKET;
 	}
 
+	/* VZ compatibility layer */
+	err = vz_security_proto_check(family, type, protocol);
+	if (err < 0)
+		return err;
+
 	err = security_socket_create(family, type, protocol, kern);
 	if (err)
 		return err;
diff -Nurap linux-2.6.9-100.orig/net/sunrpc/clnt.c linux-2.6.9-ve023stab054/net/sunrpc/clnt.c
--- linux-2.6.9-100.orig/net/sunrpc/clnt.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/sunrpc/clnt.c	2011-06-15 19:26:19.000000000 +0400
@@ -165,10 +165,10 @@ rpc_create_client(struct rpc_xprt *xprt,
 	}
 
 	/* save the nodename */
-	clnt->cl_nodelen = strlen(system_utsname.nodename);
+	clnt->cl_nodelen = strlen(ve_utsname.nodename);
 	if (clnt->cl_nodelen > UNX_MAXNODENAME)
 		clnt->cl_nodelen = UNX_MAXNODENAME;
-	memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
+	memcpy(clnt->cl_nodename, ve_utsname.nodename, clnt->cl_nodelen);
 	return clnt;
 
 out_no_auth:
diff -Nurap linux-2.6.9-100.orig/net/sunrpc/rpc_pipe.c linux-2.6.9-ve023stab054/net/sunrpc/rpc_pipe.c
--- linux-2.6.9-100.orig/net/sunrpc/rpc_pipe.c	2004-10-19 01:53:46.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/sunrpc/rpc_pipe.c	2011-06-15 19:26:22.000000000 +0400
@@ -806,6 +806,14 @@ static struct file_system_type rpc_pipe_
 	.kill_sb	= kill_litter_super,
 };
 
+void change_rpc_virt(int ve_allow_rpc)
+{
+	if (ve_allow_rpc)
+		rpc_pipe_fs_type.fs_flags |= FS_VIRTUALIZED;
+	else
+		rpc_pipe_fs_type.fs_flags &= ~FS_VIRTUALIZED;
+}
+
 static void
 init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 {
diff -Nurap linux-2.6.9-100.orig/net/sunrpc/sched.c linux-2.6.9-ve023stab054/net/sunrpc/sched.c
--- linux-2.6.9-100.orig/net/sunrpc/sched.c	2011-06-09 19:23:06.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/sunrpc/sched.c	2011-06-15 19:26:18.000000000 +0400
@@ -1133,9 +1133,9 @@ rpciod(void *ptr)
 			spin_lock_bh(&rpc_queue_lock);
 		}
 		__rpc_schedule();
-		if (current->flags & PF_FREEZE) {
+		if (test_thread_flag(TIF_FREEZE)) {
 			spin_unlock_bh(&rpc_queue_lock);
-			refrigerator(PF_FREEZE);
+			refrigerator();
 			spin_lock_bh(&rpc_queue_lock);
 		}
 
diff -Nurap linux-2.6.9-100.orig/net/sunrpc/svcsock.c linux-2.6.9-ve023stab054/net/sunrpc/svcsock.c
--- linux-2.6.9-100.orig/net/sunrpc/svcsock.c	2011-06-09 19:22:56.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/sunrpc/svcsock.c	2011-06-15 19:26:20.000000000 +0400
@@ -362,6 +362,9 @@ svc_sendto(struct svc_rqst *rqstp, struc
 	size_t		base = xdr->page_base;
 	unsigned int	pglen = xdr->page_len;
 	unsigned int	flags = MSG_MORE;
+	struct ve_struct *old_env;
+
+	old_env = set_exec_env(get_ve0());
 
 	slen = xdr->len;
 
@@ -426,6 +429,8 @@ out:
 			rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
 		rqstp->rq_addr.sin_addr.s_addr);
 
+	(void)set_exec_env(old_env);
+
 	return len;
 }
 
@@ -438,9 +443,12 @@ svc_recv_available(struct svc_sock *svsk
 	mm_segment_t	oldfs;
 	struct socket	*sock = svsk->sk_sock;
 	int		avail, err;
+	struct ve_struct *old_env;
 
 	oldfs = get_fs(); set_fs(KERNEL_DS);
+	old_env = set_exec_env(get_ve0());
 	err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
+	(void)set_exec_env(old_env);
 	set_fs(oldfs);
 
 	return (err >= 0)? avail : err;
@@ -455,6 +463,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
 	struct msghdr	msg;
 	struct socket	*sock;
 	int		len, alen;
+	struct ve_struct *old_env;
 
 	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
 	sock = rqstp->rq_sock->sk_sock;
@@ -466,7 +475,9 @@ svc_recvfrom(struct svc_rqst *rqstp, str
 
 	msg.msg_flags	= MSG_DONTWAIT;
 
+	old_env = set_exec_env(get_ve0());
 	len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
+	(void)set_exec_env(old_env);
 
 	/* sock_recvmsg doesn't fill in the name/namelen, so we must..
 	 * possibly we should cache this in the svc_sock structure
@@ -770,17 +781,19 @@ svc_tcp_accept(struct svc_sock *svsk)
 	struct proto_ops *ops;
 	struct svc_sock	*newsvsk;
 	int		err, slen;
+	struct ve_struct *old_env;
 
 	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
 	if (!sock)
 		return;
 
+	old_env = set_exec_env(get_ve0());
 	err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
 	if (err) {
 		if (err == -ENOMEM)
 			printk(KERN_WARNING "%s: no more sockets!\n",
 			       serv->sv_name);
-		return;
+		goto restore;
 	}
 
 	dprintk("svc: tcp_accept %p allocated\n", newsock);
@@ -874,6 +887,8 @@ svc_tcp_accept(struct svc_sock *svsk)
 
 	}
 
+	(void)set_exec_env(old_env);
+
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpconn++;
 
@@ -881,6 +896,8 @@ svc_tcp_accept(struct svc_sock *svsk)
 
 failed:
 	sock_release(newsock);
+restore:
+	(void)set_exec_env(old_env);
 	return;
 }
 
@@ -1229,8 +1246,8 @@ svc_recv(struct svc_serv *serv, struct s
 
 		schedule_timeout(timeout);
 
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		if (test_thread_flag(TIF_FREEZE))
+			refrigerator();
 
 		spin_lock_bh(&serv->sv_lock);
 		remove_wait_queue(&rqstp->rq_wait, &wait);
@@ -1399,6 +1416,7 @@ svc_create_socket(struct svc_serv *serv,
 	struct socket	*sock;
 	int		error;
 	int		type;
+	struct ve_struct *old_env;
 
 	dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
 				serv->sv_program->pg_name, protocol,
@@ -1412,8 +1430,10 @@ svc_create_socket(struct svc_serv *serv,
 	}
 	type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
 
+	old_env = set_exec_env(get_ve0());
+
 	if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
-		return error;
+		goto restore;
 
 	if (sin != NULL) {
 		if (type == SOCK_STREAM)
@@ -1429,12 +1449,16 @@ svc_create_socket(struct svc_serv *serv,
 			goto bummer;
 	}
 
-	if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
+	if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) {
+		(void)set_exec_env(old_env);
 		return 0;
+	}
 
 bummer:
 	dprintk("svc: svc_create_socket error = %d\n", -error);
 	sock_release(sock);
+restore:
+	(void)set_exec_env(old_env);
 	return error;
 }
 
@@ -1452,6 +1476,8 @@ svc_delete_socket(struct svc_sock *svsk)
 	serv = svsk->sk_server;
 	sk = svsk->sk_sk;
 
+	/* XXX: serialization? */
+	sk->sk_user_data = NULL;
 	sk->sk_state_change = svsk->sk_ostate;
 	sk->sk_data_ready = svsk->sk_odata;
 	sk->sk_write_space = svsk->sk_owspace;
diff -Nurap linux-2.6.9-100.orig/net/sunrpc/sysctl.c linux-2.6.9-ve023stab054/net/sunrpc/sysctl.c
--- linux-2.6.9-100.orig/net/sunrpc/sysctl.c	2011-06-09 19:22:40.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/sunrpc/sysctl.c	2011-06-15 19:26:22.000000000 +0400
@@ -36,6 +36,7 @@ unsigned int	xprt_max_resvport = 1023;
 
 #ifdef RPC_DEBUG
 
+static int ve_allow_rpc;
 static struct ctl_table_header *sunrpc_table_header;
 static ctl_table		sunrpc_table[];
 
@@ -61,6 +62,23 @@ rpc_unregister_sysctl(void)
 	}
 }
 
+static int sysctl_ve_allow_rpc(ctl_table *ctl, int write, struct file * filp,
+	void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	extern void change_rpc_virt(int);
+	int val;
+	int ret;
+
+	val = ve_allow_rpc;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (write && ve_allow_rpc != val)
+		change_rpc_virt(ve_allow_rpc);
+
+	return ret;
+}
+
 static int
 proc_dodebug(ctl_table *table, int write, struct file *file,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -207,6 +225,14 @@ static ctl_table debug_table[] = {
 		.extra1		= &min_slot_table_size,
 		.extra2		= &max_slot_table_size
 	},
+	{
+		.ctl_name	= CTL_VE_ALLOW_RPC,
+		.procname	= "ve_allow_rpc",
+		.data		= &ve_allow_rpc,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &sysctl_ve_allow_rpc,
+	},
 	{ .ctl_name = 0 }
 };
 
diff -Nurap linux-2.6.9-100.orig/net/sunrpc/xprt.c linux-2.6.9-ve023stab054/net/sunrpc/xprt.c
--- linux-2.6.9-100.orig/net/sunrpc/xprt.c	2011-06-09 19:22:57.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/sunrpc/xprt.c	2011-06-15 19:26:19.000000000 +0400
@@ -245,6 +245,7 @@ xprt_sendmsg(struct rpc_xprt *xprt, stru
 	int addrlen = 0;
 	unsigned int	skip;
 	int		result;
+	struct ve_struct *old_env;
 
 	if (!sock)
 		return -ENOTCONN;
@@ -262,7 +263,9 @@ xprt_sendmsg(struct rpc_xprt *xprt, stru
 	skip = req->rq_bytes_sent;
 
 	clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+	old_env = set_exec_env(get_ve0());
 	result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT);
+	(void)set_exec_env(old_env);
 
 	dprintk("RPC:      xprt_sendmsg(%d) = %d\n", xdr->len - skip, result);
 
@@ -483,6 +486,7 @@ static void xprt_socket_connect(void *ar
 	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
 	struct socket *sock = xprt->sock;
 	int status = -EIO;
+	struct ve_struct *old_env;
 
 	if (xprt->shutdown || xprt->addr.sin_port == 0)
 		goto out;
@@ -507,8 +511,10 @@ static void xprt_socket_connect(void *ar
 	/*
 	 * Tell the socket layer to start connecting...
 	 */
+	old_env = set_exec_env(get_ve0());
 	status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
 			sizeof(xprt->addr), O_NONBLOCK);
+	(void)set_exec_env(old_env);
 	dprintk("RPC: %p  connect status %d connected %d sock state %d\n",
 			xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
 	if (status < 0) {
@@ -1532,13 +1538,16 @@ static inline int xprt_bindresvport(stru
 		.sin_family = AF_INET,
 	};
 	int		err, port;
+	struct ve_struct *old_env;
 
 	/* Were we already bound to a given port? Try to reuse it */
 	port = xprt->port;
 	do {
 		myaddr.sin_port = htons(port);
+		old_env = set_exec_env(get_ve0());
 		err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
 						sizeof(myaddr));
+		(void)set_exec_env(old_env);
 		if (err == 0) {
 			xprt->port = port;
 			return 0;
@@ -1615,15 +1624,18 @@ static struct socket * xprt_create_socke
 {
 	struct socket	*sock;
 	int		type, err;
+	struct ve_struct *old_env;
 
 	dprintk("RPC:      xprt_create_socket(%s %d)\n",
 			   (proto == IPPROTO_UDP)? "udp" : "tcp", proto);
 
 	type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
 
+	old_env = set_exec_env(get_ve0());
+
 	if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) {
 		printk("RPC: can't create socket (%d).\n", -err);
-		return NULL;
+		goto out;
 	}
 
 	/* If the caller has the capability, bind to a reserved port */
@@ -1632,10 +1644,13 @@ static struct socket * xprt_create_socke
 		goto failed;
 	}
 
+	(void)set_exec_env(old_env);
 	return sock;
 
 failed:
 	sock_release(sock);
+out:
+	(void)set_exec_env(old_env);
 	return NULL;
 }
 
diff -Nurap linux-2.6.9-100.orig/net/tux/directory.c linux-2.6.9-ve023stab054/net/tux/directory.c
--- linux-2.6.9-100.orig/net/tux/directory.c	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/tux/directory.c	2011-06-15 19:26:19.000000000 +0400
@@ -163,11 +163,11 @@ static void do_dir_line (tux_req_t *req,
 		if (!(mode & tux_mode_allowed))
 			goto out_dput;
 
-		err = permission(inode, MAY_READ, NULL);
+		err = permission(inode, MAY_READ, NULL, NULL);
 		if (err)
 			goto out_dput;
 		if (dirp->d_type == DT_DIR) {
-			err = permission(inode, MAY_EXEC, NULL);
+			err = permission(inode, MAY_EXEC, NULL, NULL);
 			if (err)
 				goto out_dput;
 		}
diff -Nurap linux-2.6.9-100.orig/net/tux/input.c linux-2.6.9-ve023stab054/net/tux/input.c
--- linux-2.6.9-100.orig/net/tux/input.c	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/tux/input.c	2011-06-15 19:26:19.000000000 +0400
@@ -106,7 +106,7 @@ int tux_permission (struct inode *inode)
 	 */
 	if (!(mode & tux_mode_allowed))
 		return -3;
-	err = permission(inode,MAY_READ,NULL);
+	err = permission(inode,MAY_READ,NULL,NULL);
 	return err;
 }
 
diff -Nurap linux-2.6.9-100.orig/net/tux/proto_ftp.c linux-2.6.9-ve023stab054/net/tux/proto_ftp.c
--- linux-2.6.9-100.orig/net/tux/proto_ftp.c	2011-06-09 19:22:39.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/tux/proto_ftp.c	2011-06-15 19:26:19.000000000 +0400
@@ -762,7 +762,7 @@ void ftp_get_mdtm (tux_req_t *req, int c
 		}
 		goto out_err;
 	}
-	err = permission(dentry->d_inode, MAY_READ, NULL);
+	err = permission(dentry->d_inode, MAY_READ, NULL, NULL);
 	if (err)
 		goto out_err_put;
 
@@ -873,7 +873,7 @@ static void ftp_chdir (tux_req_t *req, i
 		}
 		goto out_err;
 	}
-	err = permission(dentry->d_inode, MAY_EXEC, NULL);
+	err = permission(dentry->d_inode, MAY_EXEC, NULL, NULL);
 	if (err)
 		goto out_err_put;
 	req->cwd_dentry = dentry;
@@ -1123,7 +1123,7 @@ static void ftp_lookup_listfile (tux_req
 	}
 
 	if (S_ISDIR(dentry->d_inode->i_mode)) {
-		err = permission(dentry->d_inode, MAY_EXEC, NULL);
+		err = permission(dentry->d_inode, MAY_EXEC, NULL, NULL);
 		if (err) {
 			Dprintk("Directory permission error: %d.\n", err);
 			goto out_err_put;
diff -Nurap linux-2.6.9-100.orig/net/unix/af_unix.c linux-2.6.9-ve023stab054/net/unix/af_unix.c
--- linux-2.6.9-100.orig/net/unix/af_unix.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/net/unix/af_unix.c	2011-06-15 19:26:21.000000000 +0400
@@ -119,6 +119,9 @@
 #include <net/checksum.h>
 #include <linux/security.h>
 
+#include <ub/ub_net.h>
+#include <ub/beancounter.h>
+
 int sysctl_unix_max_dgram_qlen = 10;
 
 kmem_cache_t *unix_sk_cachep;
@@ -242,6 +245,8 @@ static struct sock *__unix_find_socket_b
 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 		struct unix_sock *u = unix_sk(s);
 
+		if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
+			continue;
 		if (u->addr->len == len &&
 		    !memcmp(u->addr->name, sunname, len))
 			goto found;
@@ -446,7 +451,7 @@ static int unix_listen(struct socket *so
 	sk->sk_max_ack_backlog	= backlog;
 	sk->sk_state		= TCP_LISTEN;
 	/* set credentials so connect can copy them */
-	sk->sk_peercred.pid	= current->tgid;
+	sk->sk_peercred.pid	= virt_tgid(current);
 	sk->sk_peercred.uid	= current->euid;
 	sk->sk_peercred.gid	= current->egid;
 	err = 0;
@@ -555,6 +560,8 @@ static struct sock * unix_create1(struct
 		      unix_sk_cachep);
 	if (!sk)
 		goto out;
+	if (ub_other_sock_charge(sk))
+		goto out_sk_free;
 
 	atomic_inc(&unix_nr_socks);
 
@@ -575,6 +582,9 @@ static struct sock * unix_create1(struct
 	unix_insert_socket(unix_sockets_unbound, sk);
 out:
 	return sk;
+out_sk_free:
+	sk_free(sk);
+	return NULL;
 }
 
 static int unix_create(struct socket *sock, int protocol)
@@ -680,7 +690,7 @@ static struct sock *unix_find_other(stru
 		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
 		if (err)
 			goto fail;
-		err = permission(nd.dentry->d_inode,MAY_WRITE, &nd);
+		err = permission(nd.dentry->d_inode, MAY_WRITE, &nd, NULL);
 		if (err)
 			goto put_fail;
 
@@ -958,6 +968,7 @@ static int unix_stream_connect(struct so
 	int st;
 	int err;
 	long timeo;
+	unsigned long chargesize;
 
 	err = unix_mkname(sunaddr, addr_len, &hash);
 	if (err < 0)
@@ -985,6 +996,10 @@ static int unix_stream_connect(struct so
 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
 	if (skb == NULL)
 		goto out;
+	chargesize = skb_charge_fullsize(skb);
+	if (ub_sock_getwres_other(newsk, chargesize) < 0)
+		goto out;	
+	ub_skb_set_charge(skb, newsk, chargesize, UB_OTHERSOCKBUF);
 
 restart:
 	/*  Find listening sock. */
@@ -1070,7 +1085,7 @@ restart:
 	unix_peer(newsk)	= sk;
 	newsk->sk_state		= TCP_ESTABLISHED;
 	newsk->sk_type		= sk->sk_type;
-	newsk->sk_peercred.pid	= current->tgid;
+	newsk->sk_peercred.pid	= virt_tgid(current);
 	newsk->sk_peercred.uid	= current->euid;
 	newsk->sk_peercred.gid	= current->egid;
 	newu = unix_sk(newsk);
@@ -1129,7 +1144,7 @@ static int unix_socketpair(struct socket
 	sock_hold(skb);
 	unix_peer(ska)=skb;
 	unix_peer(skb)=ska;
-	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
+	ska->sk_peercred.pid = skb->sk_peercred.pid = virt_tgid(current);
 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
 
@@ -1233,7 +1248,7 @@ static void unix_detach_fds(struct scm_c
 		unix_notinflight(scm->fp->fp[i]);
 }
 
-static void unix_destruct_fds(struct sk_buff *skb)
+void unix_destruct_fds(struct sk_buff *skb)
 {
 	struct scm_cookie scm;
 	memset(&scm, 0, sizeof(scm));
@@ -1244,6 +1259,7 @@ static void unix_destruct_fds(struct sk_
 	scm_destroy(&scm);
 	sock_wfree(skb);
 }
+EXPORT_SYMBOL_GPL(unix_destruct_fds);
 
 #define MAX_RECURSION_LEVEL 4
 extern struct sock * unix_get_socket(struct file *filp);
@@ -1494,6 +1510,16 @@ static int unix_stream_sendmsg(struct ki
 
 		size=len-sent;
 
+		if (msg->msg_flags & MSG_DONTWAIT)
+			ub_sock_makewres_other(sk, skb_charge_size(size));
+		if (sock_bc(sk) != NULL && 
+				sock_bc(sk)->poll_reserv >= 
+					SOCK_MIN_UBCSPACE &&
+				skb_charge_size(size) >
+					sock_bc(sk)->poll_reserv)
+			size = skb_charge_datalen(sock_bc(sk)->poll_reserv);
+				
+
 		/* Keep two messages in the pipe so it schedules better */
 		if (size > sk->sk_sndbuf / 2 - 64)
 			size = sk->sk_sndbuf / 2 - 64;
@@ -1505,7 +1531,8 @@ static int unix_stream_sendmsg(struct ki
 		 *	Grab a buffer
 		 */
 		 
-		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
+		skb = sock_alloc_send_skb2(sk, size, SOCK_MIN_UBCSPACE,
+				msg->msg_flags&MSG_DONTWAIT, &err);
 
 		if (skb==NULL)
 			goto out_err;
@@ -1942,6 +1969,7 @@ static unsigned int unix_poll(struct fil
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask;
+	int no_ub_res;
 
 	poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
@@ -1952,6 +1980,10 @@ static unsigned int unix_poll(struct fil
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
 		mask |= POLLHUP;
 
+	no_ub_res = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
+	if (no_ub_res)
+		ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
+
 	/* readable? */
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
 	    (sk->sk_shutdown & RCV_SHUTDOWN))
@@ -1965,7 +1997,7 @@ static unsigned int unix_poll(struct fil
 	 * we set writable also when the other side has shut down the
 	 * connection. This prevents stuck sockets.
 	 */
-	if (unix_writable(sk))
+	if (!no_ub_res && unix_writable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 
 	return mask;
diff -Nurap linux-2.6.9-100.orig/scripts/kconfig/mconf.c linux-2.6.9-ve023stab054/scripts/kconfig/mconf.c
--- linux-2.6.9-100.orig/scripts/kconfig/mconf.c	2004-10-19 01:53:43.000000000 +0400
+++ linux-2.6.9-ve023stab054/scripts/kconfig/mconf.c	2011-06-15 19:26:18.000000000 +0400
@@ -89,7 +89,7 @@ static char *args[1024], **argptr = args
 static int indent;
 static struct termios ios_org;
 static int rows = 0, cols = 0;
-static struct menu *current_menu;
+struct menu *current_menu;
 static int child_count;
 static int do_resize;
 static int single_menu_mode;
diff -Nurap linux-2.6.9-100.orig/security/commoncap.c linux-2.6.9-ve023stab054/security/commoncap.c
--- linux-2.6.9-100.orig/security/commoncap.c	2011-06-09 19:23:07.000000000 +0400
+++ linux-2.6.9-ve023stab054/security/commoncap.c	2011-06-15 19:26:21.000000000 +0400
@@ -17,6 +17,7 @@
 #include <linux/mman.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
+#include <linux/virtinfo.h>
 #include <linux/smp_lock.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
@@ -174,7 +175,7 @@ int cap_inode_setxattr(struct dentry *de
 {
 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -183,7 +184,7 @@ int cap_inode_removexattr(struct dentry 
 {
 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -289,10 +290,10 @@ void cap_task_reparent_to_init (struct t
 
 int cap_syslog (int type)
 {
-	if (dmesg_restrict && !capable(CAP_SYS_ADMIN))
+	if (dmesg_restrict && !capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 
-	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
+	if ((type != 3 && type != 10) && !capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -314,6 +315,18 @@ int cap_vm_enough_memory(long pages)
 
 	vm_acct_memory(pages);
 
+#ifdef CONFIG_USER_RESOURCE
+	switch (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_ENOUGHMEM,
+				(void *)pages)
+			& (NOTIFY_OK | NOTIFY_FAIL)) {
+		case NOTIFY_OK:
+			return 0;
+		case NOTIFY_FAIL:
+			vm_unacct_memory(pages);
+			return -ENOMEM;
+	}
+#endif
+
 	/*
 	 * Sometimes we want to use more memory than we have
 	 */
--- ./arch/i386/Kconfig.core2	2011-06-15 17:25:15.000000000 +0400
+++ ./arch/i386/Kconfig	2011-06-15 17:25:25.000000000 +0400
@@ -504,8 +504,9 @@ config HPET_TIMER
 	  Choose N to continue using the legacy 8254 timer.
 
 config HPET_EMULATE_RTC
-	bool "Provide RTC interrupt"
+	bool
 	depends on HPET_TIMER && RTC=y
+	default y
 
 config TICK_DIVIDER
 	bool "Support clock division"
@@ -557,6 +558,28 @@ config NR_CPUS
 	  This is purely to save memory - each supported CPU adds
 	  approximately eight kilobytes to the kernel image.
 
+config FAIRSCHED
+	bool "Fair CPU scheduler (EXPERIMENTAL)"
+	default y
+	help
+	  Config option for Fair CPU scheduler (fairsched).
+	  This option allows to group processes to scheduling nodes
+	  which receive CPU proportional to their weight.
+	  This is very important feature for process groups isolation and
+	  QoS management.
+
+	  If unsure, say N.
+
+config SCHED_VCPU
+	bool "VCPU scheduler support"
+	depends on SMP || FAIRSCHED
+	default FAIRSCHED
+	help
+	  VCPU scheduler support adds additional layer of abstraction
+	  which allows to virtualize cpu notion and split physical cpus
+	  and virtual cpus. This support allows to use CPU fair scheduler,
+	  dynamically add/remove cpus to/from VPS and so on.
+
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
 	depends on SMP && !X86_XEN
@@ -1308,6 +1331,10 @@ endif
 
 source "arch/i386/Kconfig.debug"
 
+menu "OpenVZ"
+source "kernel/Kconfig.openvz"
+endmenu
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
@@ -1320,6 +1347,8 @@ endif
 
 source "lib/Kconfig"
 
+source "kernel/ub/Kconfig"
+
 config X86_SMP
 	bool
 	depends on SMP && !X86_VOYAGER
--- ./arch/ia64/Kconfig.core2	2011-06-15 17:25:15.000000000 +0400
+++ ./arch/ia64/Kconfig	2011-06-15 17:25:25.000000000 +0400
@@ -275,6 +275,28 @@ config PREEMPT
           Say Y here if you are building a kernel for a desktop, embedded
           or real-time system.  Say N if you are unsure.
 
+config FAIRSCHED
+	bool "Fair CPU scheduler (EXPERIMENTAL)"
+	default y
+	help
+	  Config option for Fair CPU scheduler (fairsched).
+	  This option allows to group processes to scheduling nodes
+	  which receive CPU proportional to their weight.
+	  This is very important feature for process groups isolation and
+	  QoS management.
+
+	  If unsure, say N.
+
+config SCHED_VCPU
+	bool "VCPU scheduler support"
+	depends on SMP || FAIRSCHED
+	default FAIRSCHED
+	help
+	  VCPU scheduler support adds additional layer of abstraction
+	  which allows to virtualize cpu notion and split physical cpus
+	  and virtual cpus. This support allows to use CPU fair scheduler,
+	  dynamically add/remove cpus to/from VPS and so on.
+
 config HAVE_DEC_LOCK
 	bool
 	depends on (SMP || PREEMPT)
@@ -401,6 +423,12 @@ source "arch/ia64/oprofile/Kconfig"
 
 source "arch/ia64/Kconfig.debug"
 
+menu "OpenVZ"
+source "kernel/Kconfig.openvz"
+endmenu
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
+
+source "kernel/ub/Kconfig"
--- ./arch/x86_64/Kconfig.core2	2011-06-15 17:25:15.000000000 +0400
+++ ./arch/x86_64/Kconfig	2011-06-15 17:25:25.000000000 +0400
@@ -307,6 +307,28 @@ config PREEMPT
 	  Say Y here if you are feeling brave and building a kernel for a
 	  desktop, embedded or real-time system.  Say N if you are unsure.
 
+config FAIRSCHED
+	bool "Fair CPU scheduler (EXPERIMENTAL)"
+	default y
+	help
+	  Config option for Fair CPU scheduler (fairsched).
+	  This option allows to group processes to scheduling nodes
+	  which receive CPU proportional to their weight.
+	  This is very important feature for process groups isolation and
+	  QoS management.
+
+	  If unsure, say N.
+
+config SCHED_VCPU
+	bool "VCPU scheduler support"
+	depends on SMP || FAIRSCHED
+	default FAIRSCHED
+	help
+	  VCPU scheduler support adds additional layer of abstraction
+	  which allows to virtualize cpu notion and split physical cpus
+	  and virtual cpus. This support allows to use CPU fair scheduler,
+	  dynamically add/remove cpus to/from VPS and so on.
+
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
 	depends on SMP && !X86_64_XEN
@@ -512,6 +534,10 @@ source "arch/x86_64/oprofile/Kconfig"
 
 source "arch/x86_64/Kconfig.debug"
 
+menu "OpenVZ"
+source "kernel/Kconfig.openvz"
+endmenu
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
@@ -523,3 +549,5 @@ source "drivers/xenpv_hvm/Kconfig"
 endif
 
 source "lib/Kconfig"
+
+source "kernel/ub/Kconfig"
--- ./drivers/net/Makefile.core2	2011-06-15 17:25:15.000000000 +0400
+++ ./drivers/net/Makefile	2011-06-15 17:25:25.000000000 +0400
@@ -17,6 +17,12 @@ obj-$(CONFIG_GIANFAR) += gianfar_driver.
 
 gianfar_driver-objs := gianfar.o gianfar_ethtool.o gianfar_phy.o
 
+obj-$(CONFIG_VE_NETDEV) += vznetdev.o
+vznetdev-objs := open_vznet.o venet_core.o
+
+obj-$(CONFIG_VE_ETHDEV) += vzethdev.o
+vzethdev-objs := veth.o
+
 #
 # link order important here
 #
--- ./fs/Kconfig.core2	2011-06-15 17:25:15.000000000 +0400
+++ ./fs/Kconfig	2011-06-15 17:25:25.000000000 +0400
@@ -417,6 +417,15 @@ config QUOTA
 	  with the quota tools. Probably the quota support is only useful for
 	  multi user systems. If unsure, say N.
 
+config QUOTA_COMPAT
+	bool "Compatibility with older quotactl interface"
+	depends on QUOTA
+	help
+	  This option enables compatibility layer for older version
+	  of quotactl interface with byte granularity (QUOTAON at 0x0100,
+	  GETQUOTA at 0x0D00).  Interface versions older than that one and
+	  with block granularity are still not supported.
+
 config QFMT_V1
 	tristate "Old quota format support"
 	depends on QUOTA
@@ -433,6 +442,38 @@ config QFMT_V2
 	  need this functionality say Y here. Note that you will need recent
 	  quota utilities (>= 3.01) for new quota format with this kernel.
 
+config SIM_FS
+	tristate "VPS filesystem"
+	depends on VZ_QUOTA
+	default m
+	help
+	  This file system is a part of Virtuozzo. It intoduces a fake
+	  superblock and blockdev to VE to hide real device and show
+	  statfs results taken from quota.
+
+config VZ_QUOTA
+	tristate "Virtuozzo Disk Quota support"
+	depends on QUOTA
+	default m
+	help
+	  Virtuozzo Disk Quota imposes disk quota on directories with their
+	  files and subdirectories in total.  Such disk quota is used to
+	  account and limit disk usage by Virtuozzo VPS, but also may be used
+	  separately.
+
+config VZ_QUOTA_UNLOAD
+	bool "Unloadable Virtuozzo Disk Quota module"
+	depends on VZ_QUOTA=m
+	default n
+	help
+	  Make Virtuozzo Disk Quota module unloadable.
+	  Doesn't work reliably now.
+
+config VZ_QUOTA_UGID
+	bool "Per-user and per-group quota in Virtuozzo quota partitions"
+	depends on VZ_QUOTA!=n
+	default y
+
 config QUOTACTL
 	bool
 	depends on XFS_QUOTA || QUOTA
--- ./fs/Makefile.core2	2011-06-15 17:25:15.000000000 +0400
+++ ./fs/Makefile	2011-06-15 17:25:25.000000000 +0400
@@ -37,6 +37,12 @@ obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
 obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
 obj-$(CONFIG_QUOTACTL)		+= quota.o
+obj-$(CONFIG_VZ_QUOTA)		+= vzdquota.o
+vzdquota-y			+= vzdquot.o vzdq_mgmt.o vzdq_ops.o vzdq_tree.o
+vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_ugid.o
+vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_file.o
+
+obj-$(CONFIG_SIM_FS)		+= simfs.o
 
 obj-$(CONFIG_PROC_FS)		+= proc/
 obj-y				+= partitions/
--- ./kernel/Makefile.core2	2011-06-15 17:25:15.000000000 +0400
+++ ./kernel/Makefile	2011-06-15 17:27:08.000000000 +0400
@@ -2,12 +2,23 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y     = sched.o fairsched.o \
+	    fork.o exec_domain.o panic.o printk.o profile.o \
 	    exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o \
-	    kthread.o dump.o kfifo.o
+	    kthread.o ksysfs.o dump.o kfifo.o
+
+obj-$(CONFIG_VE) += ve.o
+obj-y += ub/
+obj-y += veowner.o
+obj-$(CONFIG_VE_CALLS) += vzdev.o
+obj-$(CONFIG_VZ_WDOG) += vzwdog.o
+obj-$(CONFIG_VZ_CHECKPOINT) += cpt/
+obj-$(CONFIG_VE_CALLS) += vzmon.o
+vzmon-objs = vecalls.o
+obj-$(CONFIG_VZ_EVENT) += vzevent.o
 
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
