summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin H. Johnson <robbat2@gentoo.org>2010-08-09 00:21:00 +0000
committerRobin H. Johnson <robbat2@gentoo.org>2010-08-09 00:21:00 +0000
commit4b509e569a5d958c4a81e18dedd3df31a6092391 (patch)
tree18750b10edc3f28dc63d67ed9549ec1fb53bdd04 /percona/5.0.91-b22-20100522/innodb_rw_lock.patch
parentUpdated 07110 patch for mysql-5.1.49. (diff)
downloadmysql-extras-4b509e569a5d958c4a81e18dedd3df31a6092391.tar.gz
mysql-extras-4b509e569a5d958c4a81e18dedd3df31a6092391.tar.bz2
mysql-extras-4b509e569a5d958c4a81e18dedd3df31a6092391.zip
Adding latest Percona patches.
Diffstat (limited to 'percona/5.0.91-b22-20100522/innodb_rw_lock.patch')
-rw-r--r--percona/5.0.91-b22-20100522/innodb_rw_lock.patch2480
1 files changed, 2480 insertions, 0 deletions
diff --git a/percona/5.0.91-b22-20100522/innodb_rw_lock.patch b/percona/5.0.91-b22-20100522/innodb_rw_lock.patch
new file mode 100644
index 0000000..a509f70
--- /dev/null
+++ b/percona/5.0.91-b22-20100522/innodb_rw_lock.patch
@@ -0,0 +1,2480 @@
+diff -ruN a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c
+--- a/innobase/btr/btr0cur.c 2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/btr/btr0cur.c 2009-10-22 15:18:44.000000000 +0900
+@@ -313,7 +313,7 @@
+ #ifdef UNIV_SEARCH_PERF_STAT
+ info->n_searches++;
+ #endif
+- if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED
++ if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
+ && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
+ && !estimate
+ #ifdef PAGE_CUR_LE_OR_EXTENDS
+diff -ruN a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c
+--- a/innobase/btr/btr0sea.c 2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/btr/btr0sea.c 2009-10-22 15:18:44.000000000 +0900
+@@ -773,8 +773,8 @@
+ rw_lock_s_lock(&btr_search_latch);
+ }
+
+- ut_ad(btr_search_latch.writer != RW_LOCK_EX);
+- ut_ad(btr_search_latch.reader_count > 0);
++ ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
++ ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
+
+ rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
+
+diff -ruN a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
+--- a/innobase/buf/buf0buf.c 2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/buf/buf0buf.c 2009-10-22 15:18:44.000000000 +0900
+@@ -1292,7 +1292,7 @@
+
+ if (mode == BUF_GET_NOWAIT) {
+ if (rw_latch == RW_S_LATCH) {
+- success = rw_lock_s_lock_func_nowait(&(block->lock),
++ success = rw_lock_s_lock_nowait(&(block->lock),
+ file, line);
+ fix_type = MTR_MEMO_PAGE_S_FIX;
+ } else {
+@@ -1442,7 +1442,7 @@
+ ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
+
+ if (rw_latch == RW_S_LATCH) {
+- success = rw_lock_s_lock_func_nowait(&(block->lock),
++ success = rw_lock_s_lock_nowait(&(block->lock),
+ file, line);
+ fix_type = MTR_MEMO_PAGE_S_FIX;
+ } else {
+@@ -1596,7 +1596,7 @@
+ ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
+
+ if (rw_latch == RW_S_LATCH) {
+- success = rw_lock_s_lock_func_nowait(&(block->lock),
++ success = rw_lock_s_lock_nowait(&(block->lock),
+ file, line);
+ fix_type = MTR_MEMO_PAGE_S_FIX;
+ } else {
+diff -ruN a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic
+--- a/innobase/include/buf0buf.ic 2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/include/buf0buf.ic 2009-10-22 16:12:25.000000000 +0900
+@@ -523,7 +523,7 @@
+ #ifdef UNIV_SYNC_DEBUG
+ ibool ret;
+
+- ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
++ ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
+
+ ut_ad(ret == TRUE);
+ ut_ad(mutex_own(&block->mutex));
+diff -ruN a/innobase/include/os0sync.h b/innobase/include/os0sync.h
+--- a/innobase/include/os0sync.h 2009-09-10 04:02:59.000000000 +0900
++++ b/innobase/include/os0sync.h 2009-10-22 15:18:44.000000000 +0900
+@@ -1,11 +1,35 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ The interface to the operating system
+ synchronization primitives.
+
+-(c) 1995 Innobase Oy
+-
+ Created 9/6/1995 Heikki Tuuri
+ *******************************************************/
++
+ #ifndef os0sync_h
+ #define os0sync_h
+
+@@ -261,6 +285,23 @@
+ /*===============*/
+ os_fast_mutex_t* fast_mutex); /* in: mutex to free */
+
++#ifdef HAVE_ATOMIC_BUILTINS
++/**************************************************************
++Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins.
++Returns true if swapped, ptr is pointer to target, old_val is value to
++compare to, new_val is the value to swap in. */
++#define os_compare_and_swap(ptr, old_val, new_val) \
++ __sync_bool_compare_and_swap(ptr, old_val, new_val)
++
++/**************************************************************
++Atomic increment for InnoDB. Currently requires GCC atomic builtins.
++Returns the resulting value, ptr is pointer to target, amount is the
++amount of increment. */
++#define os_atomic_increment(ptr, amount) \
++ __sync_add_and_fetch(ptr, amount)
++
++#endif /* HAVE_ATOMIC_BUILTINS */
++
+ #ifndef UNIV_NONINL
+ #include "os0sync.ic"
+ #endif
+diff -ruN a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h
+--- a/innobase/include/sync0rw.h 2009-09-10 04:02:59.000000000 +0900
++++ b/innobase/include/sync0rw.h 2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ The read-write lock (for threads, not for database transactions)
+
+-(c) 1995 Innobase Oy
+-
+ Created 9/11/1995 Heikki Tuuri
+ *******************************************************/
+
+@@ -24,6 +47,12 @@
+ #define RW_X_LATCH 2
+ #define RW_NO_LATCH 3
+
++/* We decrement lock_word by this amount for each x_lock. It is also the
++start value for the lock_word, meaning that it limits the maximum number
++of concurrent read locks before the rw_lock breaks. The current value of
++0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
++#define X_LOCK_DECR 0x00100000
++
+ typedef struct rw_lock_struct rw_lock_t;
+ #ifdef UNIV_SYNC_DEBUG
+ typedef struct rw_lock_debug_struct rw_lock_debug_t;
+@@ -47,14 +76,14 @@
+ there may be waiters for the event */
+ #endif /* UNIV_SYNC_DEBUG */
+
+-extern ulint rw_s_system_call_count;
+-extern ulint rw_s_spin_wait_count;
+-extern ulint rw_s_exit_count;
+-extern ulint rw_s_os_wait_count;
+-extern ulint rw_x_system_call_count;
+-extern ulint rw_x_spin_wait_count;
+-extern ulint rw_x_os_wait_count;
+-extern ulint rw_x_exit_count;
++extern ib_longlong rw_s_spin_wait_count;
++extern ib_longlong rw_s_spin_round_count;
++extern ib_longlong rw_s_exit_count;
++extern ib_longlong rw_s_os_wait_count;
++extern ib_longlong rw_x_spin_wait_count;
++extern ib_longlong rw_x_spin_round_count;
++extern ib_longlong rw_x_os_wait_count;
++extern ib_longlong rw_x_exit_count;
+
+ /**********************************************************************
+ Creates, or rather, initializes an rw-lock object in a specified memory
+@@ -116,8 +145,22 @@
+ NOTE! The following macros should be used in rw s-locking, not the
+ corresponding function. */
+
+-#define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(\
+- (M), __FILE__, __LINE__)
++#define rw_lock_s_lock_nowait(M, F, L) rw_lock_s_lock_low(\
++ (M), 0, (F), (L))
++/**********************************************************************
++Low-level function which tries to lock an rw-lock in s-mode. Performs no
++spinning. */
++UNIV_INLINE
++ibool
++rw_lock_s_lock_low(
++/*===============*/
++ /* out: TRUE if success */
++ rw_lock_t* lock, /* in: pointer to rw-lock */
++ ulint pass __attribute__((unused)),
++ /* in: pass value; != 0, if the lock will be
++ passed to another thread to unlock */
++ const char* file_name, /* in: file name where lock requested */
++ ulint line); /* in: line where requested */
+ /**********************************************************************
+ NOTE! Use the corresponding macro, not directly this function, except if
+ you supply the file name and line number. Lock an rw-lock in shared mode
+@@ -135,18 +178,6 @@
+ const char* file_name,/* in: file name where lock requested */
+ ulint line); /* in: line where requested */
+ /**********************************************************************
+-NOTE! Use the corresponding macro, not directly this function, except if
+-you supply the file name and line number. Lock an rw-lock in shared mode
+-for the current thread if the lock can be acquired immediately. */
+-UNIV_INLINE
+-ibool
+-rw_lock_s_lock_func_nowait(
+-/*=======================*/
+- /* out: TRUE if success */
+- rw_lock_t* lock, /* in: pointer to rw-lock */
+- const char* file_name,/* in: file name where lock requested */
+- ulint line); /* in: line where requested */
+-/**********************************************************************
+ NOTE! Use the corresponding macro, not directly this function! Lock an
+ rw-lock in exclusive mode for the current thread if the lock can be
+ obtained immediately. */
+@@ -338,6 +369,41 @@
+ rw_lock_get_reader_count(
+ /*=====================*/
+ rw_lock_t* lock);
++/**********************************************************************
++Decrements lock_word the specified amount if it is greater than 0.
++This is used by both s_lock and x_lock operations. */
++UNIV_INLINE
++ibool
++rw_lock_lock_word_decr(
++/*===================*/
++ /* out: TRUE if decr occurs */
++ rw_lock_t* lock, /* in: rw-lock */
++ ulint amount); /* in: amount to decrement */
++/**********************************************************************
++Increments lock_word the specified amount and returns new value. */
++UNIV_INLINE
++lint
++rw_lock_lock_word_incr(
++/*===================*/
++ /* out: TRUE if decr occurs */
++ rw_lock_t* lock,
++ ulint amount); /* in: rw-lock */
++/**********************************************************************
++This function sets the lock->writer_thread and lock->recursive fields.
++For platforms where we are using atomic builtins instead of lock->mutex
++it sets the lock->writer_thread field using atomics to ensure memory
++ordering. Note that it is assumed that the caller of this function
++effectively owns the lock i.e.: nobody else is allowed to modify
++lock->writer_thread at this point in time.
++The protocol is that lock->writer_thread MUST be updated BEFORE the
++lock->recursive flag is set. */
++UNIV_INLINE
++void
++rw_lock_set_writer_id_and_recursion_flag(
++/*=====================================*/
++ rw_lock_t* lock, /* in/out: lock to work on */
++ ibool recursive); /* in: TRUE if recursion
++ allowed */
+ #ifdef UNIV_SYNC_DEBUG
+ /**********************************************************************
+ Checks if the thread has locked the rw-lock in the specified mode, with
+@@ -417,47 +483,33 @@
+ field. Then no new readers are allowed in. */
+
+ struct rw_lock_struct {
++ volatile lint lock_word;
++ /* Holds the state of the lock. */
++ volatile ulint waiters;/* 1: there are waiters */
++ volatile ibool recursive;/* Default value FALSE which means the lock
++ is non-recursive. The value is typically set
++ to TRUE making normal rw_locks recursive. In
++ case of asynchronous IO, when a non-zero
++ value of 'pass' is passed then we keep the
++ lock non-recursive.
++ This flag also tells us about the state of
++ writer_thread field. If this flag is set
++ then writer_thread MUST contain the thread
++ id of the current x-holder or wait-x thread.
++ This flag must be reset in x_unlock
++ functions before incrementing the lock_word */
++ volatile os_thread_id_t writer_thread;
++ /* Thread id of writer thread. Is only
++ guaranteed to have sane and non-stale
++ value iff recursive flag is set. */
+ os_event_t event; /* Used by sync0arr.c for thread queueing */
+-
+-#ifdef __WIN__
+- os_event_t wait_ex_event; /* This windows specific event is
+- used by the thread which has set the
+- lock state to RW_LOCK_WAIT_EX. The
+- rw_lock design guarantees that this
+- thread will be the next one to proceed
+- once the current the event gets
+- signalled. See LEMMA 2 in sync0sync.c */
+-#endif
+-
+- ulint reader_count; /* Number of readers who have locked this
+- lock in the shared mode */
+- ulint writer; /* This field is set to RW_LOCK_EX if there
+- is a writer owning the lock (in exclusive
+- mode), RW_LOCK_WAIT_EX if a writer is
+- queueing for the lock, and
+- RW_LOCK_NOT_LOCKED, otherwise. */
+- os_thread_id_t writer_thread;
+- /* Thread id of a possible writer thread */
+- ulint writer_count; /* Number of times the same thread has
+- recursively locked the lock in the exclusive
+- mode */
++ os_event_t wait_ex_event;
++ /* Event for next-writer to wait on. A thread
++ must decrement lock_word before waiting. */
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_t mutex; /* The mutex protecting rw_lock_struct */
+- ulint pass; /* Default value 0. This is set to some
+- value != 0 given by the caller of an x-lock
+- operation, if the x-lock is to be passed to
+- another thread to unlock (which happens in
+- asynchronous i/o). */
+- ulint waiters; /* This ulint is set to 1 if there are
+- waiters (readers or writers) in the global
+- wait array, waiting for this rw_lock.
+- Otherwise, == 0. */
+- ibool writer_is_wait_ex;
+- /* This is TRUE if the writer field is
+- RW_LOCK_WAIT_EX; this field is located far
+- from the memory update hotspot fields which
+- are at the start of this struct, thus we can
+- peek this field without causing much memory
+- bus traffic */
++#endif /* HAVE_ATOMIC_BUILTINS */
++
+ UT_LIST_NODE_T(rw_lock_t) list;
+ /* All allocated rw locks are put into a
+ list */
+@@ -465,15 +517,23 @@
+ UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
+ /* In the debug version: pointer to the debug
+ info list of the lock */
++ ulint level; /* Level in the global latching order. */
+ #endif /* UNIV_SYNC_DEBUG */
+- ulint level; /* Level in the global latching
+- order; default SYNC_LEVEL_NONE */
++ ulint count_os_wait; /* Count of os_waits. May not be accurate */
+ const char* cfile_name;/* File name where lock created */
+- ulint cline; /* Line where created */
++ /* last s-lock file/line is not guaranteed to be correct */
+ const char* last_s_file_name;/* File name where last s-locked */
+ const char* last_x_file_name;/* File name where last x-locked */
+- ulint last_s_line; /* Line number where last time s-locked */
+- ulint last_x_line; /* Line number where last time x-locked */
++ ibool writer_is_wait_ex;
++ /* This is TRUE if the writer field is
++ RW_LOCK_WAIT_EX; this field is located far
++ from the memory update hotspot fields which
++ are at the start of this struct, thus we can
++ peek this field without causing much memory
++ bus traffic */
++ unsigned cline:14; /* Line where created */
++ unsigned last_s_line:14; /* Line number where last time s-locked */
++ unsigned last_x_line:14; /* Line number where last time x-locked */
+ ulint magic_n;
+ };
+
+diff -ruN a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic
+--- a/innobase/include/sync0rw.ic 2009-09-10 04:02:59.000000000 +0900
++++ b/innobase/include/sync0rw.ic 2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ The read-write lock (for threads)
+
+-(c) 1995 Innobase Oy
+-
+ Created 9/11/1995 Heikki Tuuri
+ *******************************************************/
+
+@@ -49,53 +72,88 @@
+ ulint
+ rw_lock_get_waiters(
+ /*================*/
+- rw_lock_t* lock)
++ /* out: 1 if waiters, 0 otherwise */
++ rw_lock_t* lock) /* in: rw-lock */
+ {
+ return(lock->waiters);
+ }
++
++/************************************************************************
++Sets lock->waiters to 1. It is not an error if lock->waiters is already
++1. On platforms where ATOMIC builtins are used this function enforces a
++memory barrier. */
+ UNIV_INLINE
+ void
+-rw_lock_set_waiters(
+-/*================*/
+- rw_lock_t* lock,
+- ulint flag)
++rw_lock_set_waiter_flag(
++/*====================*/
++ rw_lock_t* lock) /* in: rw-lock */
+ {
+- lock->waiters = flag;
++#ifdef HAVE_ATOMIC_BUILTINS
++ os_compare_and_swap(&lock->waiters, 0, 1);
++#else /* HAVE_ATOMIC_BUILTINS */
++ lock->waiters = 1;
++#endif /* HAVE_ATOMIC_BUILTINS */
+ }
++
++/************************************************************************
++Resets lock->waiters to 0. It is not an error if lock->waiters is already
++0. On platforms where ATOMIC builtins are used this function enforces a
++memory barrier. */
+ UNIV_INLINE
+-ulint
+-rw_lock_get_writer(
+-/*===============*/
+- rw_lock_t* lock)
++void
++rw_lock_reset_waiter_flag(
++/*======================*/
++ rw_lock_t* lock) /* in: rw-lock */
+ {
+- return(lock->writer);
++#ifdef HAVE_ATOMIC_BUILTINS
++ os_compare_and_swap(&lock->waiters, 1, 0);
++#else /* HAVE_ATOMIC_BUILTINS */
++ lock->waiters = 0;
++#endif /* HAVE_ATOMIC_BUILTINS */
+ }
++
++/**********************************************************************
++Returns the write-status of the lock - this function made more sense
++with the old rw_lock implementation. */
+ UNIV_INLINE
+-void
+-rw_lock_set_writer(
++ulint
++rw_lock_get_writer(
+ /*===============*/
+- rw_lock_t* lock,
+- ulint flag)
++ rw_lock_t* lock)
+ {
+- lock->writer = flag;
++ lint lock_word = lock->lock_word;
++ if(lock_word > 0) {
++ /* return NOT_LOCKED in s-lock state, like the writer
++ member of the old lock implementation. */
++ return(RW_LOCK_NOT_LOCKED);
++ } else if (((-lock_word) % X_LOCK_DECR) == 0) {
++ return(RW_LOCK_EX);
++ } else {
++ ut_ad(lock_word > -X_LOCK_DECR);
++ return(RW_LOCK_WAIT_EX);
++ }
+ }
++
++/**********************************************************************
++Returns number of readers. */
+ UNIV_INLINE
+ ulint
+ rw_lock_get_reader_count(
+ /*=====================*/
+ rw_lock_t* lock)
+ {
+- return(lock->reader_count);
+-}
+-UNIV_INLINE
+-void
+-rw_lock_set_reader_count(
+-/*=====================*/
+- rw_lock_t* lock,
+- ulint count)
+-{
+- lock->reader_count = count;
++ lint lock_word = lock->lock_word;
++ if(lock_word > 0) {
++ /* s-locked, no x-waiters */
++ return(X_LOCK_DECR - lock_word);
++ } else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
++ /* s-locked, with x-waiters */
++ return((ulint)(-lock_word));
++ }
++ return(0);
+ }
++
++#ifndef HAVE_ATOMIC_BUILTINS
+ UNIV_INLINE
+ mutex_t*
+ rw_lock_get_mutex(
+@@ -104,6 +162,7 @@
+ {
+ return(&(lock->mutex));
+ }
++#endif
+
+ /**********************************************************************
+ Returns the value of writer_count for the lock. Does not reserve the lock
+@@ -115,7 +174,126 @@
+ /* out: value of writer_count */
+ rw_lock_t* lock) /* in: rw-lock */
+ {
+- return(lock->writer_count);
++ lint lock_copy = lock->lock_word;
++ /* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
++ if(lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
++ return(0);
++ }
++ return(((-lock_copy) / X_LOCK_DECR) + 1);
++}
++
++/**********************************************************************
++Two different implementations for decrementing the lock_word of a rw_lock:
++one for systems supporting atomic operations, one for others. This does
++does not support recusive x-locks: they should be handled by the caller and
++need not be atomic since they are performed by the current lock holder.
++Returns true if the decrement was made, false if not. */
++UNIV_INLINE
++ibool
++rw_lock_lock_word_decr(
++/*===================*/
++ /* out: TRUE if decr occurs */
++ rw_lock_t* lock, /* in: rw-lock */
++ ulint amount) /* in: amount of decrement */
++{
++
++#ifdef HAVE_ATOMIC_BUILTINS
++
++ lint local_lock_word = lock->lock_word;
++ while (local_lock_word > 0) {
++ if(os_compare_and_swap(&(lock->lock_word),
++ local_lock_word,
++ local_lock_word - amount)) {
++ return(TRUE);
++ }
++ local_lock_word = lock->lock_word;
++ }
++ return(FALSE);
++
++#else /* HAVE_ATOMIC_BUILTINS */
++
++ ibool success = FALSE;
++ mutex_enter(&(lock->mutex));
++ if(lock->lock_word > 0) {
++ lock->lock_word -= amount;
++ success = TRUE;
++ }
++ mutex_exit(&(lock->mutex));
++ return(success);
++
++#endif /* HAVE_ATOMIC_BUILTINS */
++}
++
++/**********************************************************************
++Two different implementations for incrementing the lock_word of a rw_lock:
++one for systems supporting atomic operations, one for others.
++Returns the value of lock_word after increment. */
++UNIV_INLINE
++lint
++rw_lock_lock_word_incr(
++/*===================*/
++ /* out: lock->lock_word after increment */
++ rw_lock_t* lock, /* in: rw-lock */
++ ulint amount) /* in: amount of increment */
++{
++
++#ifdef HAVE_ATOMIC_BUILTINS
++
++ return(os_atomic_increment(&(lock->lock_word), amount));
++
++#else /* HAVE_ATOMIC_BUILTINS */
++
++ lint local_lock_word;
++
++ mutex_enter(&(lock->mutex));
++
++ lock->lock_word += amount;
++ local_lock_word = lock->lock_word;
++
++ mutex_exit(&(lock->mutex));
++
++ return(local_lock_word);
++
++#endif /* HAVE_ATOMIC_BUILTINS */
++}
++
++/**********************************************************************
++This function sets the lock->writer_thread and lock->recursive fields.
++For platforms where we are using atomic builtins instead of lock->mutex
++it sets the lock->writer_thread field using atomics to ensure memory
++ordering. Note that it is assumed that the caller of this function
++effectively owns the lock i.e.: nobody else is allowed to modify
++lock->writer_thread at this point in time.
++The protocol is that lock->writer_thread MUST be updated BEFORE the
++lock->recursive flag is set. */
++UNIV_INLINE
++void
++rw_lock_set_writer_id_and_recursion_flag(
++/*=====================================*/
++ rw_lock_t* lock, /* in/out: lock to work on */
++ ibool recursive) /* in: TRUE if recursion
++ allowed */
++{
++ os_thread_id_t curr_thread = os_thread_get_curr_id();
++
++#ifdef HAVE_ATOMIC_BUILTINS
++ os_thread_id_t local_thread;
++ ibool success;
++
++ local_thread = lock->writer_thread;
++ success = os_compare_and_swap(&lock->writer_thread,
++ local_thread, curr_thread);
++ ut_a(success);
++ lock->recursive = recursive;
++
++#else /* HAVE_ATOMIC_BUILTINS */
++
++ mutex_enter(&lock->mutex);
++ lock->writer_thread = curr_thread;
++ lock->recursive = recursive;
++ mutex_exit(&lock->mutex);
++
++#endif /* HAVE_ATOMIC_BUILTINS */
+ }
+
+ /**********************************************************************
+@@ -133,26 +311,21 @@
+ const char* file_name, /* in: file name where lock requested */
+ ulint line) /* in: line where requested */
+ {
+-#ifdef UNIV_SYNC_DEBUG
+- ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+-#endif /* UNIV_SYNC_DEBUG */
+- /* Check if the writer field is free */
+-
+- if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) {
+- /* Set the shared lock by incrementing the reader count */
+- lock->reader_count++;
++ /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
++ if (!rw_lock_lock_word_decr(lock, 1)) {
++ /* Locking did not succeed */
++ return(FALSE);
++ }
+
+ #ifdef UNIV_SYNC_DEBUG
+- rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name,
+- line);
++ rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
+ #endif
+- lock->last_s_file_name = file_name;
+- lock->last_s_line = line;
+-
+- return(TRUE); /* locking succeeded */
+- }
++ /* These debugging values are not set safely: they may be incorrect
++ or even refer to a line that is invalid for the file name. */
++ lock->last_s_file_name = file_name;
++ lock->last_s_line = line;
+
+- return(FALSE); /* locking did not succeed */
++ return(TRUE); /* locking succeeded */
+ }
+
+ /**********************************************************************
+@@ -167,11 +340,10 @@
+ const char* file_name, /* in: file name where requested */
+ ulint line) /* in: line where lock requested */
+ {
+- ut_ad(lock->writer == RW_LOCK_NOT_LOCKED);
+- ut_ad(rw_lock_get_reader_count(lock) == 0);
++ ut_ad(lock->lock_word == X_LOCK_DECR);
+
+- /* Set the shared lock by incrementing the reader count */
+- lock->reader_count++;
++ /* Indicate there is a new reader by decrementing lock_word */
++ lock->lock_word--;
+
+ lock->last_s_file_name = file_name;
+ lock->last_s_line = line;
+@@ -194,13 +366,11 @@
+ ulint line) /* in: line where lock requested */
+ {
+ ut_ad(rw_lock_validate(lock));
+- ut_ad(rw_lock_get_reader_count(lock) == 0);
+- ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
++ ut_ad(lock->lock_word == X_LOCK_DECR);
+
+- rw_lock_set_writer(lock, RW_LOCK_EX);
++ lock->lock_word -= X_LOCK_DECR;
+ lock->writer_thread = os_thread_get_curr_id();
+- lock->writer_count++;
+- lock->pass = 0;
++ lock->recursive = TRUE;
+
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line;
+@@ -241,15 +411,12 @@
+ ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
+ #endif /* UNIV_SYNC_DEBUG */
+
+- mutex_enter(rw_lock_get_mutex(lock));
+-
+- if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) {
+- mutex_exit(rw_lock_get_mutex(lock));
++ /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
++ if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
+
+ return; /* Success */
+ } else {
+ /* Did not succeed, try spin wait */
+- mutex_exit(rw_lock_get_mutex(lock));
+
+ rw_lock_s_lock_spin(lock, pass, file_name, line);
+
+@@ -259,86 +426,60 @@
+
+ /**********************************************************************
+ NOTE! Use the corresponding macro, not directly this function! Lock an
+-rw-lock in shared mode for the current thread if the lock can be acquired
+-immediately. */
++rw-lock in exclusive mode for the current thread if the lock can be
++obtained immediately. */
+ UNIV_INLINE
+ ibool
+-rw_lock_s_lock_func_nowait(
++rw_lock_x_lock_func_nowait(
+ /*=======================*/
+ /* out: TRUE if success */
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ const char* file_name,/* in: file name where lock requested */
+ ulint line) /* in: line where requested */
+ {
+- ibool success = FALSE;
+-
+- mutex_enter(rw_lock_get_mutex(lock));
+-
+- if (lock->writer == RW_LOCK_NOT_LOCKED) {
+- /* Set the shared lock by incrementing the reader count */
+- lock->reader_count++;
++ os_thread_id_t curr_thread = os_thread_get_curr_id();
+
+-#ifdef UNIV_SYNC_DEBUG
+- rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name,
+- line);
+-#endif
++ ibool success;
+
+- lock->last_s_file_name = file_name;
+- lock->last_s_line = line;
++#ifdef HAVE_ATOMIC_BUILTINS
++ success = os_compare_and_swap(&(lock->lock_word), X_LOCK_DECR, 0);
++#else
+
++ success = FALSE;
++ mutex_enter(&(lock->mutex));
++ if (lock->lock_word == X_LOCK_DECR) {
++ lock->lock_word = 0;
+ success = TRUE;
+ }
++ mutex_exit(&(lock->mutex));
+
+- mutex_exit(rw_lock_get_mutex(lock));
+-
+- return(success);
+-}
++#endif
++ if (success) {
++ rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
+
+-/**********************************************************************
+-NOTE! Use the corresponding macro, not directly this function! Lock an
+-rw-lock in exclusive mode for the current thread if the lock can be
+-obtained immediately. */
+-UNIV_INLINE
+-ibool
+-rw_lock_x_lock_func_nowait(
+-/*=======================*/
+- /* out: TRUE if success */
+- rw_lock_t* lock, /* in: pointer to rw-lock */
+- const char* file_name,/* in: file name where lock requested */
+- ulint line) /* in: line where requested */
+-{
+- ibool success = FALSE;
+- os_thread_id_t curr_thread = os_thread_get_curr_id();
+- mutex_enter(rw_lock_get_mutex(lock));
++ } else if (lock->recursive
++ && os_thread_eq(lock->writer_thread, curr_thread)) {
++ /* Relock: this lock_word modification is safe since no other
++ threads can modify (lock, unlock, or reserve) lock_word while
++ there is an exclusive writer and this is the writer thread. */
++ lock->lock_word -= X_LOCK_DECR;
+
+- if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) {
+- } else if (UNIV_LIKELY(rw_lock_get_writer(lock)
+- == RW_LOCK_NOT_LOCKED)) {
+- rw_lock_set_writer(lock, RW_LOCK_EX);
+- lock->writer_thread = curr_thread;
+- lock->pass = 0;
+- relock:
+- lock->writer_count++;
++ ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0);
+
++ } else {
++ /* Failure */
++ return(FALSE);
++ }
+ #ifdef UNIV_SYNC_DEBUG
+- rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
++ rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
+ #endif
+
+- lock->last_x_file_name = file_name;
+- lock->last_x_line = line;
+-
+- success = TRUE;
+- } else if (rw_lock_get_writer(lock) == RW_LOCK_EX
+- && lock->pass == 0
+- && os_thread_eq(lock->writer_thread, curr_thread)) {
+- goto relock;
+- }
+-
+- mutex_exit(rw_lock_get_mutex(lock));
++ lock->last_x_file_name = file_name;
++ lock->last_x_line = line;
+
+ ut_ad(rw_lock_validate(lock));
+
+- return(success);
++ return(TRUE);
+ }
+
+ /**********************************************************************
+@@ -354,39 +495,21 @@
+ #endif
+ )
+ {
+- mutex_t* mutex = &(lock->mutex);
+- ibool sg = FALSE;
+-
+- /* Acquire the mutex protecting the rw-lock fields */
+- mutex_enter(mutex);
+-
+- /* Reset the shared lock by decrementing the reader count */
+-
+- ut_a(lock->reader_count > 0);
+- lock->reader_count--;
++ ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
+ #endif
+
+- /* If there may be waiters and this was the last s-lock,
+- signal the object */
++ /* Increment lock_word to indicate 1 less reader */
++ if (rw_lock_lock_word_incr(lock, 1) == 0) {
+
+- if (UNIV_UNLIKELY(lock->waiters)
+- && lock->reader_count == 0) {
+- sg = TRUE;
+-
+- rw_lock_set_waiters(lock, 0);
+- }
+-
+- mutex_exit(mutex);
+-
+- if (UNIV_UNLIKELY(sg)) {
+-#ifdef __WIN__
++ /* wait_ex waiter exists. It may not be asleep, but we signal
++ anyway. We do not wake other waiters, because they can't
++ exist without wait_ex waiter and wait_ex waiter goes first.*/
+ os_event_set(lock->wait_ex_event);
+-#endif
+- os_event_set(lock->event);
+ sync_array_object_signalled(sync_primary_wait_array);
++
+ }
+
+ ut_ad(rw_lock_validate(lock));
+@@ -405,16 +528,15 @@
+ /*====================*/
+ rw_lock_t* lock) /* in: rw-lock */
+ {
+- /* Reset the shared lock by decrementing the reader count */
+-
+- ut_ad(lock->reader_count > 0);
+-
+- lock->reader_count--;
++ ut_ad(lock->lock_word < X_LOCK_DECR);
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
+ #endif
+
++ /* Decrease reader count by incrementing lock_word */
++ lock->lock_word++;
++
+ ut_ad(!lock->waiters);
+ ut_ad(rw_lock_validate(lock));
+ #ifdef UNIV_SYNC_PERF_STAT
+@@ -435,42 +557,32 @@
+ #endif
+ )
+ {
+- ibool sg = FALSE;
+-
+- /* Acquire the mutex protecting the rw-lock fields */
+- mutex_enter(&(lock->mutex));
+-
+- /* Reset the exclusive lock if this thread no longer has an x-mode
+- lock */
+-
+- ut_ad(lock->writer_count > 0);
++ ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
+
+- lock->writer_count--;
+-
+- if (lock->writer_count == 0) {
+- rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
++ /* lock->recursive flag also indicates if lock->writer_thread is
++ valid or stale. If we are the last of the recursive callers
++ then we must unset lock->recursive flag to indicate that the
++ lock->writer_thread is now stale.
++ Note that since we still hold the x-lock we can safely read the
++ lock_word. */
++ if (lock->lock_word == 0) {
++ /* Last caller in a possible recursive chain. */
++ lock->recursive = FALSE;
+ }
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
+ #endif
+
+- /* If there may be waiters, signal the lock */
+- if (UNIV_UNLIKELY(lock->waiters)
+- && lock->writer_count == 0) {
+-
+- sg = TRUE;
+- rw_lock_set_waiters(lock, 0);
+- }
+-
+- mutex_exit(&(lock->mutex));
+-
+- if (UNIV_UNLIKELY(sg)) {
+-#ifdef __WIN__
+- os_event_set(lock->wait_ex_event);
+-#endif
+- os_event_set(lock->event);
+- sync_array_object_signalled(sync_primary_wait_array);
++ if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
++ /* Lock is now free. May have to signal read/write waiters.
++ We do not need to signal wait_ex waiters, since they cannot
++ exist when there is a writer. */
++ if (lock->waiters) {
++ rw_lock_reset_waiter_flag(lock);
++ os_event_set(lock->event);
++ sync_array_object_signalled(sync_primary_wait_array);
++ }
+ }
+
+ ut_ad(rw_lock_validate(lock));
+@@ -492,18 +604,18 @@
+ /* Reset the exclusive lock if this thread no longer has an x-mode
+ lock */
+
+- ut_ad(lock->writer_count > 0);
+-
+- lock->writer_count--;
+-
+- if (lock->writer_count == 0) {
+- rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+- }
++ ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
+ #endif
+
++ if (lock->lock_word == 0) {
++ lock->recursive = FALSE;
++ }
++
++ lock->lock_word += X_LOCK_DECR;
++
+ ut_ad(!lock->waiters);
+ ut_ad(rw_lock_validate(lock));
+
+diff -ruN a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
+--- a/innobase/include/sync0sync.h 2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/include/sync0sync.h 2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ Mutex, the basic synchronization primitive
+
+-(c) 1995 Innobase Oy
+-
+ Created 9/5/1995 Heikki Tuuri
+ *******************************************************/
+
+@@ -465,8 +488,11 @@
+ struct mutex_struct {
+ os_event_t event; /* Used by sync0arr.c for the wait queue */
+ ulint lock_word; /* This ulint is the target of the atomic
+- test-and-set instruction in Win32 */
+-#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
++ test-and-set instruction in Win32 and
++ x86 32/64 with GCC 4.1.0 or later version */
++#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
++#elif defined(HAVE_ATOMIC_BUILTINS)
++#else
+ os_fast_mutex_t
+ os_fast_mutex; /* In other systems we use this OS mutex
+ in place of lock_word */
+@@ -525,8 +551,7 @@
+ /* The number of system calls made in this module. Intended for performance
+ monitoring. */
+
+-extern ulint mutex_system_call_count;
+-extern ulint mutex_exit_count;
++extern ib_longlong mutex_exit_count;
+
+ /* Latching order checks start when this is set TRUE */
+ extern ibool sync_order_checks_on;
+diff -ruN a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic
+--- a/innobase/include/sync0sync.ic 2009-09-10 04:02:59.000000000 +0900
++++ b/innobase/include/sync0sync.ic 2009-10-22 15:18:44.000000000 +0900
+@@ -1,21 +1,34 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ Mutex, the basic synchronization primitive
+
+-(c) 1995 Innobase Oy
+-
+ Created 9/5/1995 Heikki Tuuri
+ *******************************************************/
+
+-#if defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
+-/* %z0: Use the size of operand %0 which in our case is *m to determine
+-instruction size, it should end up as xchgl. "1" in the input constraint,
+-says that "in" has to go in the same place as "out".*/
+-#define TAS(m, in, out) \
+- asm volatile ("xchg%z0 %2, %0" \
+- : "=g" (*(m)), "=r" (out) \
+- : "1" (in)) /* Note: "1" here refers to "=r" (out) */
+-#endif
+-
+ /**********************************************************************
+ Sets the waiters field in a mutex. */
+
+@@ -94,12 +107,8 @@
+ /* mutex_fence(); */
+
+ return(res);
+-#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
+- ulint res;
+-
+- TAS(&mutex->lock_word, 1, res);
+-
+- return(res);
++#elif defined(HAVE_ATOMIC_BUILTINS)
++ return __sync_lock_test_and_set(&(mutex->lock_word), 1);
+ #else
+ ibool ret;
+
+@@ -136,10 +145,11 @@
+ __asm MOV EDX, 0
+ __asm MOV ECX, lw
+ __asm XCHG EDX, DWORD PTR [ECX]
+-#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
+- ulint res;
+-
+- TAS(&mutex->lock_word, 0, res);
++#elif defined(HAVE_ATOMIC_BUILTINS)
++ /* In theory __sync_lock_release should be used to release the lock.
++ Unfortunately, it does not work properly alone. The workaround is
++ that more conservative __sync_lock_test_and_set is used instead. */
++ __sync_lock_test_and_set(&(mutex->lock_word), 0);
+ #else
+ mutex->lock_word = 0;
+
+diff -ruN a/innobase/row/row0sel.c b/innobase/row/row0sel.c
+--- a/innobase/row/row0sel.c 2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/row/row0sel.c 2009-10-22 15:18:44.000000000 +0900
+@@ -1178,7 +1178,7 @@
+ rw_lock_s_lock(&btr_search_latch);
+
+ search_latch_locked = TRUE;
+- } else if (btr_search_latch.writer_is_wait_ex) {
++ } else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
+
+ /* There is an x-latch request waiting: release the
+ s-latch for a moment; as an s-latch here is often
+@@ -3123,7 +3123,7 @@
+ /* PHASE 0: Release a possible s-latch we are holding on the
+ adaptive hash index latch if there is someone waiting behind */
+
+- if (UNIV_UNLIKELY(btr_search_latch.writer != RW_LOCK_NOT_LOCKED)
++ if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
+ && trx->has_search_latch) {
+
+ /* There is an x-latch request on the adaptive hash index:
+diff -ruN a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
+--- a/innobase/sync/sync0arr.c 2009-09-10 04:03:01.000000000 +0900
++++ b/innobase/sync/sync0arr.c 2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ The wait array used in synchronization primitives
+
+-(c) 1995 Innobase Oy
+-
+ Created 9/5/1995 Heikki Tuuri
+ *******************************************************/
+
+@@ -297,25 +320,21 @@
+ }
+
+ /***********************************************************************
+-Puts the cell event in reset state. */
++Returns the event that the thread owning the cell waits for. */
+ static
+-ib_longlong
+-sync_cell_event_reset(
+-/*==================*/
+- /* out: value of signal_count
+- at the time of reset. */
+- ulint type, /* in: lock type mutex/rw_lock */
+- void* object) /* in: the rw_lock/mutex object */
++os_event_t
++sync_cell_get_event(
++/*================*/
++ sync_cell_t* cell) /* in: non-empty sync array cell */
+ {
++ ulint type = cell->request_type;
++
+ if (type == SYNC_MUTEX) {
+- return(os_event_reset(((mutex_t *) object)->event));
+-#ifdef __WIN__
++ return(((mutex_t *) cell->wait_object)->event);
+ } else if (type == RW_LOCK_WAIT_EX) {
+- return(os_event_reset(
+- ((rw_lock_t *) object)->wait_ex_event));
+-#endif
+- } else {
+- return(os_event_reset(((rw_lock_t *) object)->event));
++ return(((rw_lock_t *) cell->wait_object)->wait_ex_event);
++ } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
++ return(((rw_lock_t *) cell->wait_object)->event);
+ }
+ }
+
+@@ -334,6 +353,7 @@
+ ulint* index) /* out: index of the reserved cell */
+ {
+ sync_cell_t* cell;
++ os_event_t event;
+ ulint i;
+
+ ut_a(object);
+@@ -372,8 +392,8 @@
+ /* Make sure the event is reset and also store
+ the value of signal_count at which the event
+ was reset. */
+- cell->signal_count = sync_cell_event_reset(type,
+- object);
++ event = sync_cell_get_event(cell);
++ cell->signal_count = os_event_reset(event);
+
+ cell->reservation_time = time(NULL);
+
+@@ -413,19 +433,7 @@
+ ut_a(!cell->waiting);
+ ut_ad(os_thread_get_curr_id() == cell->thread);
+
+- if (cell->request_type == SYNC_MUTEX) {
+- event = ((mutex_t*) cell->wait_object)->event;
+-#ifdef __WIN__
+- /* On windows if the thread about to wait is the one which
+- has set the state of the rw_lock to RW_LOCK_WAIT_EX, then
+- it waits on a special event i.e.: wait_ex_event. */
+- } else if (cell->request_type == RW_LOCK_WAIT_EX) {
+- event = ((rw_lock_t*) cell->wait_object)->wait_ex_event;
+-#endif
+- } else {
+- event = ((rw_lock_t*) cell->wait_object)->event;
+- }
+-
++ event = sync_cell_get_event(cell);
+ cell->waiting = TRUE;
+
+ #ifdef UNIV_SYNC_DEBUG
+@@ -464,6 +472,7 @@
+ mutex_t* mutex;
+ rw_lock_t* rwlock;
+ ulint type;
++ ulint writer;
+
+ type = cell->request_type;
+
+@@ -492,9 +501,7 @@
+ (ulong) mutex->waiters);
+
+ } else if (type == RW_LOCK_EX
+-#ifdef __WIN__
+ || type == RW_LOCK_WAIT_EX
+-#endif
+ || type == RW_LOCK_SHARED) {
+
+ fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
+@@ -505,21 +512,24 @@
+ " RW-latch at %p created in file %s line %lu\n",
+ rwlock, rwlock->cfile_name,
+ (ulong) rwlock->cline);
+- if (rwlock->writer != RW_LOCK_NOT_LOCKED) {
++ writer = rw_lock_get_writer(rwlock);
++ if (writer != RW_LOCK_NOT_LOCKED) {
+ fprintf(file,
+ "a writer (thread id %lu) has reserved it in mode %s",
+ (ulong) os_thread_pf(rwlock->writer_thread),
+- rwlock->writer == RW_LOCK_EX
++ writer == RW_LOCK_EX
+ ? " exclusive\n"
+ : " wait exclusive\n");
+ }
+
+ fprintf(file,
+- "number of readers %lu, waiters flag %lu\n"
++ "number of readers %lu, waiters flag %lu, "
++ "lock_word: %lx\n"
+ "Last time read locked in file %s line %lu\n"
+ "Last time write locked in file %s line %lu\n",
+- (ulong) rwlock->reader_count,
++ (ulong) rw_lock_get_reader_count(rwlock),
+ (ulong) rwlock->waiters,
++ rwlock->lock_word,
+ rwlock->last_s_file_name,
+ (ulong) rwlock->last_s_line,
+ rwlock->last_x_file_name,
+@@ -773,28 +783,30 @@
+ return(TRUE);
+ }
+
+- } else if (cell->request_type == RW_LOCK_EX
+- || cell->request_type == RW_LOCK_WAIT_EX) {
++ } else if (cell->request_type == RW_LOCK_EX) {
+
+ lock = cell->wait_object;
+
+- if (rw_lock_get_reader_count(lock) == 0
+- && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++ if (lock->lock_word > 0) {
++ /* Either unlocked or only read locked. */
+
+ return(TRUE);
+ }
+
+- if (rw_lock_get_reader_count(lock) == 0
+- && rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX
+- && os_thread_eq(lock->writer_thread, cell->thread)) {
++ } else if (cell->request_type == RW_LOCK_WAIT_EX) {
++
++ lock = cell->wait_object;
++
++ /* lock_word == 0 means all readers have left */
++ if (lock->lock_word == 0) {
+
+ return(TRUE);
+ }
+-
+ } else if (cell->request_type == RW_LOCK_SHARED) {
+ lock = cell->wait_object;
+
+- if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++ /* lock_word > 0 means no writer or reserved writer */
++ if (lock->lock_word > 0) {
+
+ return(TRUE);
+ }
+@@ -839,11 +851,15 @@
+ /*========================*/
+ sync_array_t* arr) /* in: wait array */
+ {
++#ifdef HAVE_ATOMIC_BUILTINS
++ (void) os_atomic_increment(&arr->sg_count, 1);
++#else
+ sync_array_enter(arr);
+
+ arr->sg_count++;
+
+ sync_array_exit(arr);
++#endif
+ }
+
+ /**************************************************************************
+@@ -859,6 +875,7 @@
+ sync_cell_t* cell;
+ ulint count;
+ ulint i;
++ os_event_t event;
+
+ sync_array_enter(arr);
+
+@@ -868,36 +885,20 @@
+ while (count < arr->n_reserved) {
+
+ cell = sync_array_get_nth_cell(arr, i);
++ i++;
+
+- if (cell->wait_object != NULL) {
+-
++ if (cell->wait_object == NULL) {
++ continue;
++ }
+ count++;
+
+ if (sync_arr_cell_can_wake_up(cell)) {
+
+- if (cell->request_type == SYNC_MUTEX) {
+- mutex_t* mutex;
++ event = sync_cell_get_event(cell);
+
+- mutex = cell->wait_object;
+- os_event_set(mutex->event);
+-#ifdef __WIN__
+- } else if (cell->request_type
+- == RW_LOCK_WAIT_EX) {
+- rw_lock_t* lock;
+-
+- lock = cell->wait_object;
+- os_event_set(lock->wait_ex_event);
+-#endif
+- } else {
+- rw_lock_t* lock;
+-
+- lock = cell->wait_object;
+- os_event_set(lock->event);
+- }
+- }
++ os_event_set(event);
+ }
+
+- i++;
+ }
+
+ sync_array_exit(arr);
+@@ -1014,4 +1015,3 @@
+
+ sync_array_exit(arr);
+ }
+-
+diff -ruN a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c
+--- a/innobase/sync/sync0rw.c 2009-09-10 04:03:01.000000000 +0900
++++ b/innobase/sync/sync0rw.c 2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ The read-write lock (for thread synchronization)
+
+-(c) 1995 Innobase Oy
+-
+ Created 9/11/1995 Heikki Tuuri
+ *******************************************************/
+
+@@ -15,17 +38,110 @@
+ #include "mem0mem.h"
+ #include "srv0srv.h"
+
+-ulint rw_s_system_call_count = 0;
+-ulint rw_s_spin_wait_count = 0;
+-ulint rw_s_os_wait_count = 0;
++/*
++ IMPLEMENTATION OF THE RW_LOCK
++ =============================
++The status of a rw_lock is held in lock_word. The initial value of lock_word is
++X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
++for each x-lock. This describes the lock state for each value of lock_word:
++
++lock_word == X_LOCK_DECR: Unlocked.
++0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers.
++ (X_LOCK_DECR - lock_word) is the
++ number of readers that hold the lock.
++lock_word == 0: Write locked
++-X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer.
++ (-lock_word) is the number of readers
++ that hold the lock.
++lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been
++ decremented by X_LOCK_DECR once for each lock,
++ so the number of locks is:
++ ((-lock_word) / X_LOCK_DECR) + 1
++When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0:
++other values of lock_word are invalid.
++
++The lock_word is always read and updated atomically and consistently, so that
++it always represents the state of the lock, and the state of the lock changes
++with a single atomic operation. This lock_word holds all of the information
++that a thread needs in order to determine if it is eligible to gain the lock
++or if it must spin or sleep. The one exception to this is that writer_thread
++must be verified before recursive write locks: to solve this scenario, we make
++writer_thread readable by all threads, but only writeable by the x-lock holder.
++
++The other members of the lock obey the following rules to remain consistent:
++
++recursive: This and the writer_thread field together control the
++ behaviour of recursive x-locking.
++ lock->recursive must be FALSE in following states:
++ 1) The writer_thread contains garbage i.e.: the
++ lock has just been initialized.
++ 2) The lock is not x-held and there is no
++ x-waiter waiting on WAIT_EX event.
++ 3) The lock is x-held or there is an x-waiter
++ waiting on WAIT_EX event but the 'pass' value
++ is non-zero.
++ lock->recursive is TRUE iff:
++ 1) The lock is x-held or there is an x-waiter
++ waiting on WAIT_EX event and the 'pass' value
++ is zero.
++ This flag must be set after the writer_thread field
++ has been updated with a memory ordering barrier.
++ It is unset before the lock_word has been incremented.
++writer_thread: Is used only in recursive x-locking. Can only be safely
++ read iff lock->recursive flag is TRUE.
++ This field is uninitialized at lock creation time and
++ is updated atomically when x-lock is acquired or when
++ move_ownership is called. A thread is only allowed to
++ set the value of this field to it's thread_id i.e.: a
++ thread cannot set writer_thread to some other thread's
++ id.
++waiters: May be set to 1 anytime, but to avoid unnecessary wake-up
++ signals, it should only be set to 1 when there are threads
++ waiting on event. Must be 1 when a writer starts waiting to
++ ensure the current x-locking thread sends a wake-up signal
++ during unlock. May only be reset to 0 immediately before a
++ a wake-up signal is sent to event. On most platforms, a
++ memory barrier is required after waiters is set, and before
++ verifying lock_word is still held, to ensure some unlocker
++ really does see the flags new value.
++event: Threads wait on event for read or writer lock when another
++ thread has an x-lock or an x-lock reservation (wait_ex). A
++ thread may only wait on event after performing the following
++ actions in order:
++ (1) Record the counter value of event (with os_event_reset).
++ (2) Set waiters to 1.
++ (3) Verify lock_word <= 0.
++ (1) must come before (2) to ensure signal is not missed.
++ (2) must come before (3) to ensure a signal is sent.
++ These restrictions force the above ordering.
++ Immediately before sending the wake-up signal, we should:
++ (1) Verify lock_word == X_LOCK_DECR (unlocked)
++ (2) Reset waiters to 0.
++wait_ex_event: A thread may only wait on the wait_ex_event after it has
++ performed the following actions in order:
++ (1) Decrement lock_word by X_LOCK_DECR.
++ (2) Record counter value of wait_ex_event (os_event_reset,
++ called from sync_array_reserve_cell).
++ (3) Verify that lock_word < 0.
++ (1) must come first to ensures no other threads become reader
++ or next writer, and notifies unlocker that signal must be sent.
++ (2) must come before (3) to ensure the signal is not missed.
++ These restrictions force the above ordering.
++ Immediately before sending the wake-up signal, we should:
++ Verify lock_word == 0 (waiting thread holds x_lock)
++*/
++
++ib_longlong rw_s_spin_wait_count = 0;
++ib_longlong rw_s_spin_round_count = 0;
++ib_longlong rw_s_os_wait_count = 0;
++
++ib_longlong rw_s_exit_count = 0;
++
++ib_longlong rw_x_spin_wait_count = 0;
++ib_longlong rw_x_spin_round_count = 0;
++ib_longlong rw_x_os_wait_count = 0;
+
+-ulint rw_s_exit_count = 0;
+-
+-ulint rw_x_system_call_count = 0;
+-ulint rw_x_spin_wait_count = 0;
+-ulint rw_x_os_wait_count = 0;
+-
+-ulint rw_x_exit_count = 0;
++ib_longlong rw_x_exit_count = 0;
+
+ /* The global list of rw-locks */
+ rw_lock_list_t rw_lock_list;
+@@ -99,22 +215,30 @@
+ object is created, then the following call initializes
+ the sync system. */
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_create(rw_lock_get_mutex(lock));
+ mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
+
+ lock->mutex.cfile_name = cfile_name;
+ lock->mutex.cline = cline;
+-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
++# if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+ lock->mutex.cmutex_name = cmutex_name;
+ lock->mutex.mutex_type = 1;
+-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
++# endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+
+- rw_lock_set_waiters(lock, 0);
+- rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+- lock->writer_count = 0;
+- rw_lock_set_reader_count(lock, 0);
+-
+- lock->writer_is_wait_ex = FALSE;
++#else /* HAVE_ATOMIC_BUILTINS */
++# ifdef UNIV_DEBUG
++ UT_NOT_USED(cmutex_name);
++# endif
++#endif /* HAVE_ATOMIC_BUILTINS */
++
++ lock->lock_word = X_LOCK_DECR;
++ lock->waiters = 0;
++
++ /* We set this value to signify that lock->writer_thread
++ contains garbage at initialization and cannot be used for
++ recursive x-locking. */
++ lock->recursive = FALSE;
+
+ #ifdef UNIV_SYNC_DEBUG
+ UT_LIST_INIT(lock->debug_list);
+@@ -126,15 +250,13 @@
+ lock->cfile_name = cfile_name;
+ lock->cline = cline;
+
++ lock->count_os_wait = 0;
+ lock->last_s_file_name = "not yet reserved";
+ lock->last_x_file_name = "not yet reserved";
+ lock->last_s_line = 0;
+ lock->last_x_line = 0;
+ lock->event = os_event_create(NULL);
+-
+-#ifdef __WIN__
+ lock->wait_ex_event = os_event_create(NULL);
+-#endif
+
+ mutex_enter(&rw_lock_list_mutex);
+
+@@ -158,23 +280,17 @@
+ /*=========*/
+ rw_lock_t* lock) /* in: rw-lock */
+ {
+-#ifdef UNIV_DEBUG
+ ut_a(rw_lock_validate(lock));
+-#endif /* UNIV_DEBUG */
+- ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+- ut_a(rw_lock_get_waiters(lock) == 0);
+- ut_a(rw_lock_get_reader_count(lock) == 0);
++ ut_a(lock->lock_word == X_LOCK_DECR);
+
+- lock->magic_n = 0;
+-
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_free(rw_lock_get_mutex(lock));
++#endif /* HAVE_ATOMIC_BUILTINS */
+
+ mutex_enter(&rw_lock_list_mutex);
+ os_event_free(lock->event);
+
+-#ifdef __WIN__
+ os_event_free(lock->wait_ex_event);
+-#endif
+
+ if (UT_LIST_GET_PREV(list, lock)) {
+ ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
+@@ -186,6 +302,8 @@
+ UT_LIST_REMOVE(list, rw_lock_list, lock);
+
+ mutex_exit(&rw_lock_list_mutex);
++
++ lock->magic_n = 0;
+ }
+
+ /**********************************************************************
+@@ -199,19 +317,12 @@
+ {
+ ut_a(lock);
+
+- mutex_enter(rw_lock_get_mutex(lock));
++ ulint waiters = rw_lock_get_waiters(lock);
++ lint lock_word = lock->lock_word;
+
+ ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
+- ut_a((rw_lock_get_reader_count(lock) == 0)
+- || (rw_lock_get_writer(lock) != RW_LOCK_EX));
+- ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX)
+- || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
+- || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED));
+- ut_a((rw_lock_get_waiters(lock) == 0)
+- || (rw_lock_get_waiters(lock) == 1));
+- ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0));
+-
+- mutex_exit(rw_lock_get_mutex(lock));
++ ut_a(waiters == 0 || waiters == 1);
++ ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
+
+ return(TRUE);
+ }
+@@ -232,18 +343,15 @@
+ ulint line) /* in: line where requested */
+ {
+ ulint index; /* index of the reserved wait cell */
+- ulint i; /* spin round count */
++ ulint i = 0; /* spin round count */
+
+ ut_ad(rw_lock_validate(lock));
+
++ rw_s_spin_wait_count++; /* Count calls to this function */
+ lock_loop:
+- rw_s_spin_wait_count++;
+
+ /* Spin waiting for the writer field to become free */
+- i = 0;
+-
+- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
+- && i < SYNC_SPIN_ROUNDS) {
++ while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ }
+@@ -262,28 +370,32 @@
+ lock->cfile_name, (ulong) lock->cline, (ulong) i);
+ }
+
+- mutex_enter(rw_lock_get_mutex(lock));
+-
+ /* We try once again to obtain the lock */
+-
+ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
+- mutex_exit(rw_lock_get_mutex(lock));
++ rw_s_spin_round_count += i;
+
+ return; /* Success */
+ } else {
+- /* If we get here, locking did not succeed, we may
+- suspend the thread to wait in the wait array */
+
+- rw_s_system_call_count++;
++ if (i < SYNC_SPIN_ROUNDS) {
++ goto lock_loop;
++ }
++
++ rw_s_spin_round_count += i;
+
+ sync_array_reserve_cell(sync_primary_wait_array,
+ lock, RW_LOCK_SHARED,
+ file_name, line,
+ &index);
+
+- rw_lock_set_waiters(lock, 1);
+-
+- mutex_exit(rw_lock_get_mutex(lock));
++ /* Set waiters before checking lock_word to ensure wake-up
++ signal is sent. This may lead to some unnecessary signals. */
++ rw_lock_set_waiter_flag(lock);
++
++ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
++ sync_array_free_cell(sync_primary_wait_array, index);
++ return; /* Success */
++ }
+
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+@@ -292,11 +404,13 @@
+ lock, lock->cfile_name, (ulong) lock->cline);
+ }
+
+- rw_s_system_call_count++;
++ /* these stats may not be accurate */
++ lock->count_os_wait++;
+ rw_s_os_wait_count++;
+
+ sync_array_wait_event(sync_primary_wait_array, index);
+
++ i = 0;
+ goto lock_loop;
+ }
+ }
+@@ -318,114 +432,130 @@
+ {
+ ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
+
+- mutex_enter(&(lock->mutex));
+-
+- lock->writer_thread = os_thread_get_curr_id();
+-
+- lock->pass = 0;
+-
+- mutex_exit(&(lock->mutex));
++ rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
+ }
+
+ /**********************************************************************
+-Low-level function for acquiring an exclusive lock. */
++Function for the next writer to call. Waits for readers to exit.
++The caller must have already decremented lock_word by X_LOCK_DECR.*/
+ UNIV_INLINE
+-ulint
+-rw_lock_x_lock_low(
+-/*===============*/
+- /* out: RW_LOCK_NOT_LOCKED if did
+- not succeed, RW_LOCK_EX if success,
+- RW_LOCK_WAIT_EX, if got wait reservation */
++void
++rw_lock_x_lock_wait(
++/*================*/
+ rw_lock_t* lock, /* in: pointer to rw-lock */
++#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /* in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
++#endif
+ const char* file_name,/* in: file name where lock requested */
+ ulint line) /* in: line where requested */
+ {
+-#ifdef UNIV_SYNC_DEBUG
+- ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+-#endif /* UNIV_SYNC_DEBUG */
+- if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++ ulint index;
++ ulint i = 0;
+
+- if (rw_lock_get_reader_count(lock) == 0) {
++ ut_ad(lock->lock_word <= 0);
++
++ while (lock->lock_word < 0) {
++ if (srv_spin_wait_delay) {
++ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
++ }
++ if(i < SYNC_SPIN_ROUNDS) {
++ i++;
++ continue;
++ }
+
+- rw_lock_set_writer(lock, RW_LOCK_EX);
+- lock->writer_thread = os_thread_get_curr_id();
+- lock->writer_count++;
+- lock->pass = pass;
++ /* If there is still a reader, then go to sleep.*/
++ rw_x_spin_round_count += i;
++ i = 0;
++ sync_array_reserve_cell(sync_primary_wait_array,
++ lock,
++ RW_LOCK_WAIT_EX,
++ file_name, line,
++ &index);
++ /* Check lock_word to ensure wake-up isn't missed.*/
++ if(lock->lock_word < 0) {
+
++ /* these stats may not be accurate */
++ lock->count_os_wait++;
++ rw_x_os_wait_count++;
++
++ /* Add debug info as it is needed to detect possible
++ deadlock. We must add info for WAIT_EX thread for
++ deadlock detection to work properly. */
+ #ifdef UNIV_SYNC_DEBUG
+- rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
++ rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
+ file_name, line);
+ #endif
+- lock->last_x_file_name = file_name;
+- lock->last_x_line = line;
+-
+- /* Locking succeeded, we may return */
+- return(RW_LOCK_EX);
+- } else {
+- /* There are readers, we have to wait */
+- rw_lock_set_writer(lock, RW_LOCK_WAIT_EX);
+- lock->writer_thread = os_thread_get_curr_id();
+- lock->pass = pass;
+- lock->writer_is_wait_ex = TRUE;
+
++ sync_array_wait_event(sync_primary_wait_array,
++ index);
+ #ifdef UNIV_SYNC_DEBUG
+- rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
+- file_name, line);
++ rw_lock_remove_debug_info(lock, pass,
++ RW_LOCK_WAIT_EX);
+ #endif
+-
+- return(RW_LOCK_WAIT_EX);
++ /* It is possible to wake when lock_word < 0.
++ We must pass the while-loop check to proceed.*/
++ } else {
++ sync_array_free_cell(sync_primary_wait_array,
++ index);
+ }
++ }
++ rw_x_spin_round_count += i;
++}
+
+- } else if ((rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
+- && os_thread_eq(lock->writer_thread,
+- os_thread_get_curr_id())) {
++/**********************************************************************
++Low-level function for acquiring an exclusive lock. */
++UNIV_INLINE
++ibool
++rw_lock_x_lock_low(
++/*===============*/
++ /* out: RW_LOCK_NOT_LOCKED if did
++ not succeed, RW_LOCK_EX if success. */
++ rw_lock_t* lock, /* in: pointer to rw-lock */
++ ulint pass, /* in: pass value; != 0, if the lock will
++ be passed to another thread to unlock */
++ const char* file_name,/* in: file name where lock requested */
++ ulint line) /* in: line where requested */
++{
++ os_thread_id_t curr_thread = os_thread_get_curr_id();
+
+- if (rw_lock_get_reader_count(lock) == 0) {
++ if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
+
+- rw_lock_set_writer(lock, RW_LOCK_EX);
+- lock->writer_count++;
+- lock->pass = pass;
+- lock->writer_is_wait_ex = FALSE;
++ /* lock->recursive also tells us if the writer_thread
++ field is stale or active. As we are going to write
++ our own thread id in that field it must be that the
++ current writer_thread value is not active. */
++ ut_a(!lock->recursive);
+
++ /* Decrement occurred: we are writer or next-writer. */
++ rw_lock_set_writer_id_and_recursion_flag(lock,
++ pass ? FALSE : TRUE);
++
++ rw_lock_x_lock_wait(lock,
+ #ifdef UNIV_SYNC_DEBUG
+- rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
+- rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
+- file_name, line);
++ pass,
+ #endif
++ file_name, line);
+
+- lock->last_x_file_name = file_name;
+- lock->last_x_line = line;
+-
+- /* Locking succeeded, we may return */
+- return(RW_LOCK_EX);
++ } else {
++ /* Decrement failed: relock or failed lock */
++ if (!pass && lock->recursive
++ && os_thread_eq(lock->writer_thread, curr_thread)) {
++ /* Relock */
++ lock->lock_word -= X_LOCK_DECR;
++ } else {
++ /* Another thread locked before us */
++ return(FALSE);
+ }
+-
+- return(RW_LOCK_WAIT_EX);
+-
+- } else if ((rw_lock_get_writer(lock) == RW_LOCK_EX)
+- && os_thread_eq(lock->writer_thread,
+- os_thread_get_curr_id())
+- && (lock->pass == 0)
+- && (pass == 0)) {
+-
+- lock->writer_count++;
+-
++ }
+ #ifdef UNIV_SYNC_DEBUG
+- rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
+- line);
++ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
++ file_name, line);
+ #endif
++ lock->last_x_file_name = file_name;
++ lock->last_x_line = (unsigned int) line;
+
+- lock->last_x_file_name = file_name;
+- lock->last_x_line = line;
+-
+- /* Locking succeeded, we may return */
+- return(RW_LOCK_EX);
+- }
+-
+- /* Locking did not succeed */
+- return(RW_LOCK_NOT_LOCKED);
++ return(TRUE);
+ }
+
+ /**********************************************************************
+@@ -448,47 +578,30 @@
+ ulint line) /* in: line where requested */
+ {
+ ulint index; /* index of the reserved wait cell */
+- ulint state; /* lock state acquired */
+ ulint i; /* spin round count */
++ ibool spinning = FALSE;
+
+ ut_ad(rw_lock_validate(lock));
+
+-lock_loop:
+- /* Acquire the mutex protecting the rw-lock fields */
+- mutex_enter_fast(&(lock->mutex));
+-
+- state = rw_lock_x_lock_low(lock, pass, file_name, line);
++ i = 0;
+
+- mutex_exit(&(lock->mutex));
++lock_loop:
+
+- if (state == RW_LOCK_EX) {
++ if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
++ rw_x_spin_round_count += i;
+
+ return; /* Locking succeeded */
+
+- } else if (state == RW_LOCK_NOT_LOCKED) {
+-
+- /* Spin waiting for the writer field to become free */
+- i = 0;
+-
+- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
+- && i < SYNC_SPIN_ROUNDS) {
+- if (srv_spin_wait_delay) {
+- ut_delay(ut_rnd_interval(0,
+- srv_spin_wait_delay));
+- }
++ } else {
+
+- i++;
+- }
+- if (i == SYNC_SPIN_ROUNDS) {
+- os_thread_yield();
++ if (!spinning) {
++ spinning = TRUE;
++ rw_x_spin_wait_count++;
+ }
+- } else if (state == RW_LOCK_WAIT_EX) {
+
+- /* Spin waiting for the reader count field to become zero */
+- i = 0;
+-
+- while (rw_lock_get_reader_count(lock) != 0
+- && i < SYNC_SPIN_ROUNDS) {
++ /* Spin waiting for the lock_word to become free */
++ while (i < SYNC_SPIN_ROUNDS
++ && lock->lock_word <= 0) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0,
+ srv_spin_wait_delay));
+@@ -498,12 +611,13 @@
+ }
+ if (i == SYNC_SPIN_ROUNDS) {
+ os_thread_yield();
++ } else {
++ goto lock_loop;
+ }
+- } else {
+- i = 0; /* Eliminate a compiler warning */
+- ut_error;
+ }
+
++ rw_x_spin_round_count += i;
++
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+ "Thread %lu spin wait rw-x-lock at %p cfile %s cline %lu rnds %lu\n",
+@@ -511,39 +625,20 @@
+ lock->cfile_name, (ulong) lock->cline, (ulong) i);
+ }
+
+- rw_x_spin_wait_count++;
+-
+- /* We try once again to obtain the lock. Acquire the mutex protecting
+- the rw-lock fields */
+-
+- mutex_enter(rw_lock_get_mutex(lock));
+-
+- state = rw_lock_x_lock_low(lock, pass, file_name, line);
+-
+- if (state == RW_LOCK_EX) {
+- mutex_exit(rw_lock_get_mutex(lock));
+-
+- return; /* Locking succeeded */
+- }
+-
+- rw_x_system_call_count++;
+-
+ sync_array_reserve_cell(sync_primary_wait_array,
+ lock,
+-#ifdef __WIN__
+- /* On windows RW_LOCK_WAIT_EX signifies
+- that this thread should wait on the
+- special wait_ex_event. */
+- (state == RW_LOCK_WAIT_EX)
+- ? RW_LOCK_WAIT_EX :
+-#endif
+ RW_LOCK_EX,
+ file_name, line,
+ &index);
+
+- rw_lock_set_waiters(lock, 1);
+-
+- mutex_exit(rw_lock_get_mutex(lock));
++ /* Waiters must be set before checking lock_word, to ensure signal
++ is sent. This could lead to a few unnecessary wake-up signals. */
++ rw_lock_set_waiter_flag(lock);
++
++ if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
++ sync_array_free_cell(sync_primary_wait_array, index);
++ return; /* Locking succeeded */
++ }
+
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+@@ -552,11 +647,13 @@
+ lock->cfile_name, (ulong) lock->cline);
+ }
+
+- rw_x_system_call_count++;
++ /* these stats may not be accurate */
++ lock->count_os_wait++;
+ rw_x_os_wait_count++;
+
+ sync_array_wait_event(sync_primary_wait_array, index);
+
++ i = 0;
+ goto lock_loop;
+ }
+
+@@ -697,7 +794,9 @@
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint level) /* in: level */
+ {
++#ifdef UNIV_SYNC_DEBUG
+ lock->level = level;
++#endif /* UNIV_SYNC_DEBUG */
+ }
+
+ #ifdef UNIV_SYNC_DEBUG
+@@ -718,7 +817,7 @@
+ ut_ad(lock);
+ ut_ad(rw_lock_validate(lock));
+
+- mutex_enter(&(lock->mutex));
++ rw_lock_debug_mutex_enter();
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+
+@@ -728,7 +827,7 @@
+ && (info->pass == 0)
+ && (info->lock_type == lock_type)) {
+
+- mutex_exit(&(lock->mutex));
++ rw_lock_debug_mutex_exit();
+ /* Found! */
+
+ return(TRUE);
+@@ -736,7 +835,7 @@
+
+ info = UT_LIST_GET_NEXT(list, info);
+ }
+- mutex_exit(&(lock->mutex));
++ rw_lock_debug_mutex_exit();
+
+ return(FALSE);
+ }
+@@ -758,22 +857,18 @@
+ ut_ad(lock);
+ ut_ad(rw_lock_validate(lock));
+
+- mutex_enter(&(lock->mutex));
+-
+ if (lock_type == RW_LOCK_SHARED) {
+- if (lock->reader_count > 0) {
++ if (rw_lock_get_reader_count(lock) > 0) {
+ ret = TRUE;
+ }
+ } else if (lock_type == RW_LOCK_EX) {
+- if (lock->writer == RW_LOCK_EX) {
++ if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
+ ret = TRUE;
+ }
+ } else {
+ ut_error;
+ }
+
+- mutex_exit(&(lock->mutex));
+-
+ return(ret);
+ }
+
+@@ -801,11 +896,10 @@
+
+ count++;
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_enter(&(lock->mutex));
+-
+- if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+- || (rw_lock_get_reader_count(lock) != 0)
+- || (rw_lock_get_waiters(lock) != 0)) {
++#endif
++ if (lock->lock_word != X_LOCK_DECR) {
+
+ fprintf(stderr, "RW-LOCK: %p ", lock);
+
+@@ -821,8 +915,10 @@
+ info = UT_LIST_GET_NEXT(list, info);
+ }
+ }
+-
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(&(lock->mutex));
++#endif
++
+ lock = UT_LIST_GET_NEXT(list, lock);
+ }
+
+@@ -845,9 +941,10 @@
+ "RW-LATCH INFO\n"
+ "RW-LATCH: %p ", lock);
+
+- if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+- || (rw_lock_get_reader_count(lock) != 0)
+- || (rw_lock_get_waiters(lock) != 0)) {
++#ifndef HAVE_ATOMIC_BUILTINS
++ mutex_enter(&(lock->mutex));
++#endif
++ if (lock->lock_word != X_LOCK_DECR) {
+
+ if (rw_lock_get_waiters(lock)) {
+ fputs(" Waiters for the lock exist\n", stderr);
+@@ -861,6 +958,9 @@
+ info = UT_LIST_GET_NEXT(list, info);
+ }
+ }
++#ifndef HAVE_ATOMIC_BUILTINS
++ mutex_exit(&(lock->mutex));
++#endif
+ }
+
+ /*************************************************************************
+@@ -909,14 +1009,11 @@
+ lock = UT_LIST_GET_FIRST(rw_lock_list);
+
+ while (lock != NULL) {
+- mutex_enter(rw_lock_get_mutex(lock));
+
+- if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+- || (rw_lock_get_reader_count(lock) != 0)) {
++ if (lock->lock_word != X_LOCK_DECR) {
+ count++;
+ }
+
+- mutex_exit(rw_lock_get_mutex(lock));
+ lock = UT_LIST_GET_NEXT(list, lock);
+ }
+
+diff -ruN a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c
+--- a/innobase/sync/sync0sync.c 2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/sync/sync0sync.c 2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ Mutex, the basic synchronization primitive
+
+-(c) 1995 Innobase Oy
+-
+ Created 9/5/1995 Heikki Tuuri
+ *******************************************************/
+
+@@ -140,17 +163,12 @@
+
+ ulint sync_dummy = 0;
+
+-/* The number of system calls made in this module. Intended for performance
+-monitoring. */
+-
+-ulint mutex_system_call_count = 0;
+-
+ /* Number of spin waits on mutexes: for performance monitoring */
+
+-ulint mutex_spin_round_count = 0;
+-ulint mutex_spin_wait_count = 0;
+-ulint mutex_os_wait_count = 0;
+-ulint mutex_exit_count = 0;
++ib_longlong mutex_spin_round_count = 0;
++ib_longlong mutex_spin_wait_count = 0;
++ib_longlong mutex_os_wait_count = 0;
++ib_longlong mutex_exit_count = 0;
+
+ /* The global array of wait cells for implementation of the database's own
+ mutexes and read-write locks */
+@@ -240,6 +258,8 @@
+ {
+ #if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
+ mutex_reset_lock_word(mutex);
++#elif defined(HAVE_ATOMIC_BUILTINS)
++ mutex_reset_lock_word(mutex);
+ #else
+ os_fast_mutex_init(&(mutex->os_fast_mutex));
+ mutex->lock_word = 0;
+@@ -325,7 +345,9 @@
+
+ os_event_free(mutex->event);
+
+-#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
++#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
++#elif defined(HAVE_ATOMIC_BUILTINS)
++#else
+ os_fast_mutex_free(&(mutex->os_fast_mutex));
+ #endif
+ /* If we free the mutex protecting the mutex list (freeing is
+@@ -421,6 +443,12 @@
+ #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+ ut_ad(mutex);
+
++ /* This update is not thread safe, but we don't mind if the count
++ isn't exact. Moved out of ifdef that follows because we are willing
++ to sacrifice the cost of counting this as the data is valuable.
++ Count the number of calls to mutex_spin_wait. */
++ mutex_spin_wait_count++;
++
+ mutex_loop:
+
+ i = 0;
+@@ -433,7 +461,6 @@
+
+ spin_loop:
+ #if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+- mutex_spin_wait_count++;
+ mutex->count_spin_loop++;
+ #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+
+@@ -502,8 +529,6 @@
+ sync_array_reserve_cell(sync_primary_wait_array, mutex,
+ SYNC_MUTEX, file_name, line, &index);
+
+- mutex_system_call_count++;
+-
+ /* The memory order of the array reservation and the change in the
+ waiters field is important: when we suspend a thread, we first
+ reserve the cell and then set waiters field to 1. When threads are
+@@ -551,7 +576,6 @@
+ mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
+ #endif
+
+- mutex_system_call_count++;
+ mutex_os_wait_count++;
+
+ #ifndef UNIV_HOTBACKUP
+@@ -1368,20 +1392,31 @@
+ FILE* file) /* in: file where to print */
+ {
+ #ifdef UNIV_SYNC_DEBUG
+- fprintf(stderr, "Mutex exits %lu, rws exits %lu, rwx exits %lu\n",
++ fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
+ mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
+ #endif
+
+ fprintf(file,
+-"Mutex spin waits %lu, rounds %lu, OS waits %lu\n"
+-"RW-shared spins %lu, OS waits %lu; RW-excl spins %lu, OS waits %lu\n",
+- (ulong) mutex_spin_wait_count,
+- (ulong) mutex_spin_round_count,
+- (ulong) mutex_os_wait_count,
+- (ulong) rw_s_spin_wait_count,
+- (ulong) rw_s_os_wait_count,
+- (ulong) rw_x_spin_wait_count,
+- (ulong) rw_x_os_wait_count);
++ "Mutex spin waits %llu, rounds %llu, OS waits %llu\n"
++ "RW-shared spins %llu, OS waits %llu;"
++ " RW-excl spins %llu, OS waits %llu\n",
++ mutex_spin_wait_count,
++ mutex_spin_round_count,
++ mutex_os_wait_count,
++ rw_s_spin_wait_count,
++ rw_s_os_wait_count,
++ rw_x_spin_wait_count,
++ rw_x_os_wait_count);
++
++ fprintf(file,
++ "Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
++ "%.2f RW-excl\n",
++ (double) mutex_spin_round_count /
++ (mutex_spin_wait_count ? mutex_spin_wait_count : 1),
++ (double) rw_s_spin_round_count /
++ (rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
++ (double) rw_x_spin_round_count /
++ (rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
+ }
+
+ /***********************************************************************
+diff -ruN a/patch_info/innodb_rw_lock.info b/patch_info/innodb_rw_lock.info
+--- /dev/null 1970-01-01 09:00:00.000000000 +0900
++++ b/patch_info/innodb_rw_lock.info 2009-10-22 15:18:30.000000000 +0900
+@@ -0,0 +1,6 @@
++File=innodb_rw_lock.patch
++Name=Fix of InnoDB rw_locks ported from InnoDB Plugin
++Version=1.0
++Author=InnoBase Oy.
++License=GPL
++Comment=