diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h
index 3f9ce4d8061420fc707ceaa63c5100f5d7306299..49186e4aa80e0ed99b173d810ed5e5169d73b7d1 100644
--- a/lustre/include/lprocfs_status.h
+++ b/lustre/include/lprocfs_status.h
@@ -136,10 +136,20 @@ struct lprocfs_percpu {
         struct lprocfs_counter lp_cntr[0];
 };
 
+#define LPROCFS_GET_NUM_CPU 0x0001
+#define LPROCFS_GET_SMP_ID  0x0002
+
+enum lprocfs_stats_flags {
+        LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu
+                                               * area and need locking */
+};
 
 struct lprocfs_stats {
         unsigned int           ls_num;     /* # of counters */
         unsigned int           ls_percpu_size;
+        int                    ls_flags; /* See LPROCFS_STATS_FLAG_* */
+        spinlock_t             ls_lock;  /* Lock used only when there are
+                                          * no percpu stats areas */
         struct lprocfs_percpu *ls_percpu[0];
 };
 
@@ -170,6 +180,31 @@ static inline void s2dhms(struct dhms *ts, time_t secs)
 
 #ifdef LPROCFS
 
+static inline int lprocfs_stats_lock(struct lprocfs_stats *stats, int type)
+{
+        int rc = 0;
+
+        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+                if (type & LPROCFS_GET_NUM_CPU)
+                        rc = 1;
+                if (type & LPROCFS_GET_SMP_ID)
+                        rc = 0;
+                spin_lock(&stats->ls_lock);
+        } else {
+                if (type & LPROCFS_GET_NUM_CPU)
+                        rc = num_possible_cpus();
+                if (type & LPROCFS_GET_SMP_ID)
+                        rc = smp_processor_id();
+        }
+        return rc;
+}
+
+static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats)
+{
+        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
+                spin_unlock(&stats->ls_lock);
+}
+
 /* Two optimized LPROCFS counter increment functions are provided:
  *     lprocfs_counter_incr(cntr, value) - optimized for by-one counters
  *     lprocfs_counter_add(cntr) - use for multi-valued counters
@@ -181,10 +216,16 @@ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
                                        long amount)
 {
         struct lprocfs_counter *percpu_cntr;
+        int smp_id;
 
         if (!stats)
                 return;
-        percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]);
+
+        /* With per-client stats, statistics are allocated only for
+         * single CPU area, so the smp_id should be 0 always. */
+        smp_id = lprocfs_stats_lock(stats, LPROCFS_GET_SMP_ID);
+
+        percpu_cntr = &(stats->ls_percpu[smp_id]->lp_cntr[idx]);
         atomic_inc(&percpu_cntr->lc_cntl.la_entry);
         percpu_cntr->lc_count++;
 
@@ -198,21 +239,29 @@ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
                         percpu_cntr->lc_max = amount;
         }
         atomic_inc(&percpu_cntr->lc_cntl.la_exit);
+        lprocfs_stats_unlock(stats);
 }
 
 static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx)
 {
         struct lprocfs_counter *percpu_cntr;
+        int smp_id;
 
         if (!stats)
                 return;
-        percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]);
+
+        smp_id = lprocfs_stats_lock(stats, LPROCFS_GET_SMP_ID);
+
+        percpu_cntr = &(stats->ls_percpu[smp_id]->lp_cntr[idx]);
         atomic_inc(&percpu_cntr->lc_cntl.la_entry);
         percpu_cntr->lc_count++;
         atomic_inc(&percpu_cntr->lc_cntl.la_exit);
+
+        lprocfs_stats_unlock(stats);
 }
 
-extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num);
+extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
+                                                 enum lprocfs_stats_flags flags);
 extern void lprocfs_clear_stats(struct lprocfs_stats *stats);
 extern void lprocfs_free_stats(struct lprocfs_stats **stats);
 extern void lprocfs_init_ops_stats(int num_private_stats, 
@@ -421,7 +470,8 @@ static inline void lprocfs_counter_init(struct lprocfs_stats *stats,
                                         const char *name, const char *units)
 { return; }
 
-static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num)
+static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num,
+                                                        int client_stat)
 { return NULL; }
 static inline void lprocfs_clear_stats(struct lprocfs_stats *stats)
 { return; }
diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c
index 8900a74c34733474d3b562b370ec6903cf1dabbe..139c20535e00d433933cdde1aa3f353b78998f61 100644
--- a/lustre/ldlm/ldlm_pool.c
+++ b/lustre/ldlm/ldlm_pool.c
@@ -486,7 +486,7 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl)
         lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);
 
         pl->pl_stats = lprocfs_alloc_stats(LDLM_POOL_LAST_STAT -
-                                           LDLM_POOL_GRANTED_STAT);
+                                           LDLM_POOL_GRANTED_STAT, 0);
         if (!pl->pl_stats)
                 GOTO(out_free_name, rc = -ENOMEM);
 
diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c
index 81dade0b126f722b6a150a71a11cd49479f332b5..5f8d84c0c924084b651e48e7ecd0eef2e1ab31c6 100644
--- a/lustre/llite/lproc_llite.c
+++ b/lustre/llite/lproc_llite.c
@@ -687,7 +687,7 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
         entry->data = sbi;
 
         /* File operations stats */
-        sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES);
+        sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES, 0);
         if (sbi->ll_stats == NULL)
                 GOTO(out, err = -ENOMEM);
         /* do counter init */
diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c
index cacbff274e274637f6a4e61daab61ceba788a551..e55621d7667686ce4f6ff55a3cfbc57323391d49 100644
--- a/lustre/mds/mds_fs.c
+++ b/lustre/mds/mds_fs.c
@@ -59,7 +59,8 @@ static int mds_export_stats_init(struct obd_device *obd, struct obd_export *exp)
                 return rc;
         num_stats = (sizeof(*obd->obd_type->typ_ops) / sizeof(void *)) +
                      LPROC_MDS_LAST - 1;
-        exp->exp_ops_stats = lprocfs_alloc_stats(num_stats);
+        exp->exp_ops_stats = lprocfs_alloc_stats(num_stats,
+                                                 LPROCFS_STATS_FLAG_NOPERCPU);
         if (exp->exp_ops_stats == NULL)
                 return -ENOMEM;
         lprocfs_init_ops_stats(LPROC_MDS_LAST, exp->exp_ops_stats);
diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c
index 77d405a6aba5cd44834115be197b926be0de4585..3b0e212a1d6d7962006b31d9719e23ded4b26bd9 100644
--- a/lustre/obdclass/lprocfs_status.c
+++ b/lustre/obdclass/lprocfs_status.c
@@ -742,31 +742,49 @@ int lprocfs_obd_cleanup(struct obd_device *obd)
         return 0;
 }
 
-struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num)
+struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
+                                          enum lprocfs_stats_flags flags)
 {
         struct lprocfs_stats *stats;
         struct lprocfs_percpu *percpu;
         unsigned int percpusize;
         unsigned int i;
+        unsigned int num_cpu;
 
         if (num == 0)
                 return NULL;
 
-        OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()]));
+        if (flags & LPROCFS_STATS_FLAG_NOPERCPU)
+                num_cpu = 1;
+        else
+                num_cpu = num_possible_cpus();
+
+        OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_cpu]));
         if (stats == NULL)
                 return NULL;
 
-        percpusize = L1_CACHE_ALIGN(offsetof(typeof(*percpu), lp_cntr[num]));
-        stats->ls_percpu_size = num_online_cpus() * percpusize;
+        if (flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+                stats->ls_flags = flags;
+                spin_lock_init(&stats->ls_lock);
+                /* Use this lock only if there are no percpu areas */
+        } else {
+                stats->ls_flags = 0;
+        }
+
+        percpusize = offsetof(typeof(*percpu), lp_cntr[num]);
+        if (num_cpu > 1)
+                percpusize = L1_CACHE_ALIGN(percpusize);
+
+        stats->ls_percpu_size = num_cpu * percpusize;
         OBD_ALLOC(stats->ls_percpu[0], stats->ls_percpu_size);
         if (stats->ls_percpu[0] == NULL) {
                 OBD_FREE(stats, offsetof(typeof(*stats),
-                                         ls_percpu[num_online_cpus()]));
+                                         ls_percpu[num_cpu]));
                 return NULL;
         }
 
         stats->ls_num = num;
-        for (i = 1; i < num_online_cpus(); i++)
+        for (i = 1; i < num_cpu; i++)
                 stats->ls_percpu[i] = (void *)(stats->ls_percpu[i - 1]) +
                         percpusize;
 
@@ -776,21 +794,29 @@ struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num)
 void lprocfs_free_stats(struct lprocfs_stats **statsh)
 {
         struct lprocfs_stats *stats = *statsh;
+        unsigned int num_cpu;
         
         if (!stats || (stats->ls_num == 0))
                 return;
         *statsh = NULL;
+        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
+                num_cpu = 1;
+        else
+                num_cpu = num_possible_cpus();
 
         OBD_FREE(stats->ls_percpu[0], stats->ls_percpu_size);
-        OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()]));
+        OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_cpu]));
 }
 
 void lprocfs_clear_stats(struct lprocfs_stats *stats)
 {
         struct lprocfs_counter *percpu_cntr;
-        int i,j;
+        int i, j;
+        unsigned int num_cpu;
+
+        num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU);
 
-        for (i = 0; i < num_online_cpus(); i++) {
+        for (i = 0; i < num_cpu; i++) {
                 for (j = 0; j < stats->ls_num; j++) {        
                         percpu_cntr = &(stats->ls_percpu[i])->lp_cntr[j];
                         atomic_inc(&percpu_cntr->lc_cntl.la_entry);
@@ -802,6 +828,8 @@ void lprocfs_clear_stats(struct lprocfs_stats *stats)
                         atomic_inc(&percpu_cntr->lc_cntl.la_exit);
                 }
         }
+
+        lprocfs_stats_unlock(stats);
 }
 
 static ssize_t lprocfs_stats_seq_write(struct file *file, const char *buf,
@@ -842,6 +870,7 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
        struct lprocfs_counter  *cntr = v;
        struct lprocfs_counter  t, ret = { .lc_min = ~(__u64)0 };
        int i, idx, rc = 0;
+       unsigned int num_cpu;
 
        if (cntr == &(stats->ls_percpu[0])->lp_cntr[0]) {
                struct timeval now;
@@ -853,7 +882,12 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
        }
        idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0];
 
-       for (i = 0; i < num_online_cpus(); i++) {
+       if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
+               num_cpu = 1;
+       else
+               num_cpu = num_possible_cpus();
+
+       for (i = 0; i < num_cpu; i++) {
                struct lprocfs_counter *percpu_cntr =
                        &(stats->ls_percpu[i])->lp_cntr[idx];
                int centry;
@@ -952,9 +986,13 @@ void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
 {
         struct lprocfs_counter *c;
         int i;
+        unsigned int num_cpu;
 
         LASSERT(stats != NULL);
-        for (i = 0; i < num_online_cpus(); i++) {
+
+        num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU);
+
+        for (i = 0; i < num_cpu; i++) {
                 c = &(stats->ls_percpu[i]->lp_cntr[index]);
                 c->lc_config = conf;
                 c->lc_count = 0;
@@ -964,6 +1002,8 @@ void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
                 c->lc_name = name;
                 c->lc_units = units;
         }
+
+        lprocfs_stats_unlock(stats);
 }
 EXPORT_SYMBOL(lprocfs_counter_init);
 
@@ -1054,7 +1094,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
 
         num_stats = ((int)sizeof(*obd->obd_type->typ_ops) / sizeof(void *)) +
                 num_private_stats - 1 /* o_owner */;
-        stats = lprocfs_alloc_stats(num_stats);
+        stats = lprocfs_alloc_stats(num_stats, 0);
         if (stats == NULL)
                 return -ENOMEM;
 
@@ -1144,7 +1184,7 @@ int lprocfs_exp_setup(struct obd_export *exp)
 
         /* Always add ldlm stats */
         exp->exp_ldlm_stats = lprocfs_alloc_stats(LDLM_LAST_OPC 
-                                                  - LDLM_FIRST_OPC);
+                                                  - LDLM_FIRST_OPC, 0);
         if (exp->exp_ldlm_stats == NULL) {
                 lprocfs_remove(&exp->exp_proc);
                 GOTO(out, rc = -ENOMEM);
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c
index ec3725142bc3c276a80b99065c0d85fad5468b20..3ef1477c527d5cdcc50b55128f6ef6b4a381171e 100644
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -188,7 +188,8 @@ static int filter_export_stats_init(struct obd_device *obd,
         /* Create a per export proc entry for ops stats */
         num_stats = (sizeof(*obd->obd_type->typ_ops) / sizeof(void *)) +
                      LPROC_FILTER_LAST - 1;
-        exp->exp_ops_stats = lprocfs_alloc_stats(num_stats);
+        exp->exp_ops_stats = lprocfs_alloc_stats(num_stats,
+                                                 LPROCFS_STATS_FLAG_NOPERCPU);
         if (exp->exp_ops_stats == NULL)
               RETURN(-ENOMEM);
         lprocfs_init_ops_stats(LPROC_FILTER_LAST, exp->exp_ops_stats);
diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c
index 15a7da27ba4cdb2761340f597b39e05408ae3ec1..79d68444208c49db1c09d4515270c925dcaa8e20 100644
--- a/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/lustre/ptlrpc/lproc_ptlrpc.c
@@ -133,7 +133,7 @@ void ptlrpc_lprocfs_register(struct proc_dir_entry *root, char *dir,
         LASSERT(*procroot_ret == NULL);
         LASSERT(*stats_ret == NULL);
 
-        svc_stats = lprocfs_alloc_stats(PTLRPC_LAST_CNTR + LUSTRE_MAX_OPCODES);
+        svc_stats = lprocfs_alloc_stats(PTLRPC_LAST_CNTR + LUSTRE_MAX_OPCODES, 0);
         if (svc_stats == NULL)
                 return;