xref: /oneTBB/src/tbb/queuing_rw_mutex.cpp (revision b15aabb3)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 /** Before making any changes in the implementation, please emulate algorithmic changes
18     with SPIN tool using <TBB directory>/tools/spin_models/ReaderWriterMutex.pml.
19     There could be some code looking as "can be restructured" but its structure does matter! */
20 
21 #include "oneapi/tbb/queuing_rw_mutex.h"
22 #include "oneapi/tbb/detail/_assert.h"
23 #include "oneapi/tbb/detail/_utils.h"
24 #include "itt_notify.h"
25 
26 namespace tbb {
27 namespace detail {
28 namespace r1 {
29 
30 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
31     // Workaround for overzealous compiler warnings
32     #pragma warning (push)
33     #pragma warning (disable: 4311 4312)
34 #endif
35 
36 //! A view of a T* with additional functionality for twiddling low-order bits.
37 template<typename T>
38 class tricky_atomic_pointer {
39 public:
40     using word = uintptr_t;
41 
42     static T* fetch_add( std::atomic<word>& location, word addend, std::memory_order memory_order ) {
43         return reinterpret_cast<T*>(location.fetch_add(addend, memory_order));
44     }
45 
46     static T* exchange( std::atomic<word>& location, T* value, std::memory_order memory_order ) {
47         return reinterpret_cast<T*>(location.exchange(reinterpret_cast<word>(value), memory_order));
48     }
49 
50     static T* compare_exchange_strong( std::atomic<word>& obj, const T* expected, const T* desired, std::memory_order memory_order ) {
51         word expd = reinterpret_cast<word>(expected);
52         obj.compare_exchange_strong(expd, reinterpret_cast<word>(desired), memory_order);
53         return reinterpret_cast<T*>(expd);
54     }
55 
56     static void store( std::atomic<word>& location, const T* value, std::memory_order memory_order ) {
57         location.store(reinterpret_cast<word>(value), memory_order);
58     }
59 
60     static T* load( std::atomic<word>& location, std::memory_order memory_order ) {
61         return reinterpret_cast<T*>(location.load(memory_order));
62     }
63 
64     static void spin_wait_while_eq(const std::atomic<word>& location, const T* value) {
65         tbb::detail::d0::spin_wait_while_eq(location, reinterpret_cast<word>(value) );
66     }
67 
68     T* & ref;
69     tricky_atomic_pointer( T*& original ) : ref(original) {};
70     tricky_atomic_pointer(const tricky_atomic_pointer&) = delete;
71     tricky_atomic_pointer& operator=(const tricky_atomic_pointer&) = delete;
72     T* operator&( const word operand2 ) const {
73         return reinterpret_cast<T*>( reinterpret_cast<word>(ref) & operand2 );
74     }
75     T* operator|( const word operand2 ) const {
76         return reinterpret_cast<T*>( reinterpret_cast<word>(ref) | operand2 );
77     }
78 };
79 
80 using tricky_pointer = tricky_atomic_pointer<queuing_rw_mutex::scoped_lock>;
81 
82 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
83     // Workaround for overzealous compiler warnings
84     #pragma warning (pop)
85 #endif
86 
87 //! Flag bits in a state_t that specify information about a locking request.
88 enum state_t_flags : unsigned char {
89     STATE_NONE                   = 0,
90     STATE_WRITER                 = 1<<0,
91     STATE_READER                 = 1<<1,
92     STATE_READER_UNBLOCKNEXT     = 1<<2,
93     STATE_ACTIVEREADER           = 1<<3,
94     STATE_UPGRADE_REQUESTED      = 1<<4,
95     STATE_UPGRADE_WAITING        = 1<<5,
96     STATE_UPGRADE_LOSER          = 1<<6,
97     STATE_COMBINED_WAITINGREADER = STATE_READER | STATE_READER_UNBLOCKNEXT,
98     STATE_COMBINED_READER        = STATE_COMBINED_WAITINGREADER | STATE_ACTIVEREADER,
99     STATE_COMBINED_UPGRADING     = STATE_UPGRADE_WAITING | STATE_UPGRADE_LOSER
100 };
101 
102 static const unsigned char RELEASED = 0;
103 static const unsigned char ACQUIRED = 1;
104 
105 struct queuing_rw_mutex_impl {
106     //! Try to acquire the internal lock
107     /** Returns true if lock was successfully acquired. */
108     static bool try_acquire_internal_lock(d1::queuing_rw_mutex::scoped_lock& s)
109     {
110         auto expected = RELEASED;
111         return s.my_internal_lock.compare_exchange_strong(expected, ACQUIRED);
112     }
113 
114     //! Acquire the internal lock
115     static void acquire_internal_lock(d1::queuing_rw_mutex::scoped_lock& s)
116     {
117         // Usually, we would use the test-test-and-set idiom here, with exponential backoff.
118         // But so far, experiments indicate there is no value in doing so here.
119         while( !try_acquire_internal_lock(s) ) {
120             machine_pause(1);
121         }
122     }
123 
124     //! Release the internal lock
125     static void release_internal_lock(d1::queuing_rw_mutex::scoped_lock& s)
126     {
127         s.my_internal_lock.store(RELEASED, std::memory_order_release);
128     }
129 
130     //! Wait for internal lock to be released
131     static void wait_for_release_of_internal_lock(d1::queuing_rw_mutex::scoped_lock& s)
132     {
133         spin_wait_until_eq(s.my_internal_lock, RELEASED);
134     }
135 
136     //! A helper function
137     static void unblock_or_wait_on_internal_lock(d1::queuing_rw_mutex::scoped_lock& s, uintptr_t flag ) {
138         if( flag ) {
139             wait_for_release_of_internal_lock(s);
140         }
141         else {
142             release_internal_lock(s);
143         }
144     }
145 
146     //! Mask for low order bit of a pointer.
147     static const tricky_pointer::word FLAG = 0x1;
148 
149     static uintptr_t get_flag( d1::queuing_rw_mutex::scoped_lock* ptr ) {
150         return reinterpret_cast<uintptr_t>(ptr) & FLAG;
151     }
152 
153     //------------------------------------------------------------------------
154     // Methods of queuing_rw_mutex::scoped_lock
155     //------------------------------------------------------------------------
156 
157     //! A method to acquire queuing_rw_mutex lock
158     static void acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write)
159     {
160         __TBB_ASSERT( !s.my_mutex, "scoped_lock is already holding a mutex");
161 
162         // Must set all fields before the exchange, because once the
163         // exchange executes, *this becomes accessible to other threads.
164         s.my_mutex = &m;
165         s.my_prev.store(0U, std::memory_order_relaxed);
166         s.my_next.store(0U, std::memory_order_relaxed);
167         s.my_going.store(0U, std::memory_order_relaxed);
168         s.my_state.store(d1::queuing_rw_mutex::scoped_lock::state_t(write ? STATE_WRITER : STATE_READER), std::memory_order_relaxed);
169         s.my_internal_lock.store(RELEASED, std::memory_order_relaxed);
170 
171         queuing_rw_mutex::scoped_lock* predecessor = m.q_tail.exchange(&s, std::memory_order_release);
172 
173         if( write ) {       // Acquiring for write
174 
175             if( predecessor ) {
176                 ITT_NOTIFY(sync_prepare, s.my_mutex);
177                 predecessor = tricky_pointer(predecessor) & ~FLAG;
178                 __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" );
179     #if TBB_USE_ASSERT
180                 atomic_fence(std::memory_order_seq_cst); // on "m.q_tail"
181                 __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!");
182     #endif
183                 tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release);
184                 spin_wait_until_eq(s.my_going, 1U);
185             }
186 
187         } else {            // Acquiring for read
188     #if __TBB_USE_ITT_NOTIFY
189             bool sync_prepare_done = false;
190     #endif
191             if( predecessor ) {
192                 unsigned char pred_state;
193                 __TBB_ASSERT( !s.my_prev, "the predecessor is already set" );
194                 if( tricky_pointer(predecessor) & FLAG ) {
195                     /* this is only possible if predecessor is an upgrading reader and it signals us to wait */
196                     pred_state = STATE_UPGRADE_WAITING;
197                     predecessor = tricky_pointer(predecessor) & ~FLAG;
198                 } else {
199                     // Load predecessor->my_state now, because once predecessor->my_next becomes
200                     // non-NULL, we must assume that *predecessor might be destroyed.
201                     pred_state = STATE_READER;
202                     predecessor->my_state.compare_exchange_strong(pred_state, STATE_READER_UNBLOCKNEXT, std::memory_order_acq_rel);
203                 }
204                 tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed);
205                 __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" );
206     #if TBB_USE_ASSERT
207                 atomic_fence(std::memory_order_seq_cst); // on "m.q_tail"
208                 __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!");
209     #endif
210                 tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release);
211                 if( pred_state != STATE_ACTIVEREADER ) {
212     #if __TBB_USE_ITT_NOTIFY
213                     sync_prepare_done = true;
214                     ITT_NOTIFY(sync_prepare, s.my_mutex);
215     #endif
216                     spin_wait_until_eq(s.my_going, 1U);
217                 }
218             }
219 
220             // The protected state must have been acquired here before it can be further released to any other reader(s):
221             unsigned char old_state = STATE_READER;
222             s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_acq_rel);
223             if( old_state!=STATE_READER ) {
224 #if __TBB_USE_ITT_NOTIFY
225                 if( !sync_prepare_done )
226                     ITT_NOTIFY(sync_prepare, s.my_mutex);
227 #endif
228                 // Failed to become active reader -> need to unblock the next waiting reader first
229                 __TBB_ASSERT( s.my_state==STATE_READER_UNBLOCKNEXT, "unexpected state" );
230                 spin_wait_while_eq(s.my_next, 0U);
231                 /* my_state should be changed before unblocking the next otherwise it might finish
232                    and another thread can get our old state and left blocked */
233                 s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed);
234                 tricky_pointer::load(s.my_next, std::memory_order_relaxed)->my_going.store(1U, std::memory_order_release);
235             }
236             __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "unlocked reader is active reader" );
237         }
238 
239         ITT_NOTIFY(sync_acquired, s.my_mutex);
240 
241         // Force acquire so that user's critical section receives correct values
242         // from processor that was previously in the user's critical section.
243         atomic_fence(std::memory_order_acquire);
244     }
245 
246     //! A method to acquire queuing_rw_mutex if it is free
247     static bool try_acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write)
248     {
249         __TBB_ASSERT( !s.my_mutex, "scoped_lock is already holding a mutex");
250 
251         if( m.q_tail.load(std::memory_order_relaxed) )
252             return false; // Someone already took the lock
253 
254         // Must set all fields before the exchange, because once the
255         // exchange executes, *this becomes accessible to other threads.
256         s.my_prev.store(0U, std::memory_order_relaxed);
257         s.my_next.store(0U, std::memory_order_relaxed);
258         s.my_going.store(0U, std::memory_order_relaxed); // TODO: remove dead assignment?
259         s.my_state.store(d1::queuing_rw_mutex::scoped_lock::state_t(write ? STATE_WRITER : STATE_ACTIVEREADER), std::memory_order_relaxed);
260         s.my_internal_lock.store(RELEASED, std::memory_order_relaxed);
261 
262         // The CAS must have release semantics, because we are
263         // "sending" the fields initialized above to other processors.
264         d1::queuing_rw_mutex::scoped_lock* expected = nullptr;
265         if( !m.q_tail.compare_exchange_strong(expected, &s, std::memory_order_release) )
266             return false; // Someone already took the lock
267         // Force acquire so that user's critical section receives correct values
268         // from processor that was previously in the user's critical section.
269         atomic_fence(std::memory_order_acquire);
270         s.my_mutex = &m;
271         ITT_NOTIFY(sync_acquired, s.my_mutex);
272         return true;
273     }
274 
275     //! A method to release queuing_rw_mutex lock
276     static void release(d1::queuing_rw_mutex::scoped_lock& s) {
277         __TBB_ASSERT(s.my_mutex!=nullptr, "no lock acquired");
278 
279         ITT_NOTIFY(sync_releasing, s.my_mutex);
280 
281         if( s.my_state.load(std::memory_order_relaxed) == STATE_WRITER ) { // Acquired for write
282 
283             // The logic below is the same as "writerUnlock", but elides
284             // "return" from the middle of the routine.
285             // In the statement below, acquire semantics of reading my_next is required
286             // so that following operations with fields of my_next are safe.
287             d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire);
288             if( !next ) {
289                 d1::queuing_rw_mutex::scoped_lock* expected = &s;
290                 if( s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) {
291                     // this was the only item in the queue, and the queue is now empty.
292                     goto done;
293                 }
294                 spin_wait_while_eq( s.my_next, 0U );
295                 next = tricky_pointer::load(s.my_next, std::memory_order_acquire);
296             }
297             next->my_going.store(2U, std::memory_order_relaxed); // protect next queue node from being destroyed too early
298             if( next->my_state==STATE_UPGRADE_WAITING ) {
299                 // the next waiting for upgrade means this writer was upgraded before.
300                 acquire_internal_lock(s);
301                 // Responsibility transition, the one who reads uncorrupted my_prev will do release.
302                 d1::queuing_rw_mutex::scoped_lock* tmp = tricky_pointer::exchange(next->my_prev, nullptr, std::memory_order_release);
303                 next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed);
304                 next->my_going.store(1U, std::memory_order_release);
305                 unblock_or_wait_on_internal_lock(s, get_flag(tmp));
306             } else {
307                 // next->state cannot be STATE_UPGRADE_REQUESTED
308                 __TBB_ASSERT( next->my_state & (STATE_COMBINED_WAITINGREADER | STATE_WRITER), "unexpected state" );
309                 __TBB_ASSERT( !( next->my_prev.load() & FLAG ), "use of corrupted pointer!" );
310                 tricky_pointer::store(next->my_prev, nullptr, std::memory_order_relaxed);
311                 next->my_going.store(1U, std::memory_order_release);
312             }
313 
314         } else { // Acquired for read
315 
316             queuing_rw_mutex::scoped_lock *tmp = nullptr;
317     retry:
318             // Addition to the original paper: Mark my_prev as in use
319             queuing_rw_mutex::scoped_lock *predecessor = tricky_pointer::fetch_add(s.my_prev, FLAG, std::memory_order_acquire);
320 
321             if( predecessor ) {
322                 if( !(try_acquire_internal_lock(*predecessor)) )
323                 {
324                     // Failed to acquire the lock on predecessor. The predecessor either unlinks or upgrades.
325                     // In the second case, it could or could not know my "in use" flag - need to check
326                     // Responsibility transition, the one who reads uncorrupted my_prev will do release.
327                     tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor) | FLAG, predecessor, std::memory_order_release);
328                     if( !(tricky_pointer(tmp) & FLAG) ) {
329                         // Wait for the predecessor to change my_prev (e.g. during unlink)
330                         // TODO: spin_wait condition seems never reachable
331                         tricky_pointer::spin_wait_while_eq( s.my_prev, tricky_pointer(predecessor)|FLAG );
332                         // Now owner of predecessor is waiting for _us_ to release its lock
333                         release_internal_lock(*predecessor);
334                     }
335                     // else the "in use" flag is back -> the predecessor didn't get it and will release itself; nothing to do
336 
337                     tmp = nullptr;
338                     goto retry;
339                 }
340                 __TBB_ASSERT(predecessor && predecessor->my_internal_lock.load(std::memory_order_relaxed)==ACQUIRED, "predecessor's lock is not acquired");
341                 tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed);
342                 acquire_internal_lock(s);
343 
344                 tricky_pointer::store(predecessor->my_next, nullptr, std::memory_order_release);
345 
346                 d1::queuing_rw_mutex::scoped_lock* expected = &s;
347                 if( !tricky_pointer::load(s.my_next, std::memory_order_relaxed) && !s.my_mutex->q_tail.compare_exchange_strong(expected, predecessor, std::memory_order_release) ) {
348                     spin_wait_while_eq( s.my_next, 0U );
349                 }
350                 __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer" );
351 
352                 // ensure acquire semantics of reading 'my_next'
353                 if(d1::queuing_rw_mutex::scoped_lock *const l_next = tricky_pointer::load(s.my_next, std::memory_order_acquire) ) { // I->next != nil, TODO: rename to next after clearing up and adapting the n in the comment two lines below
354                     // Equivalent to I->next->prev = I->prev but protected against (prev[n]&FLAG)!=0
355                     tmp = tricky_pointer::exchange(l_next->my_prev, predecessor, std::memory_order_release);
356                     // I->prev->next = I->next;
357                     __TBB_ASSERT(tricky_pointer::load(s.my_prev, std::memory_order_relaxed)==predecessor, nullptr);
358                     predecessor->my_next.store(s.my_next.load(std::memory_order_relaxed), std::memory_order_release);
359                 }
360                 // Safe to release in the order opposite to acquiring which makes the code simpler
361                 release_internal_lock(*predecessor);
362 
363             } else { // No predecessor when we looked
364                 acquire_internal_lock(s);  // "exclusiveLock(&I->EL)"
365                 d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire);
366                 if( !next ) {
367                     d1::queuing_rw_mutex::scoped_lock* expected = &s;
368                     if( !s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) {
369                         spin_wait_while_eq( s.my_next, 0U );
370                         next = tricky_pointer::load(s.my_next, std::memory_order_relaxed);
371                     } else {
372                         goto unlock_self;
373                     }
374                 }
375                 next->my_going.store(2U, std::memory_order_relaxed);
376                 // Responsibility transition, the one who reads uncorrupted my_prev will do release.
377                 tmp = tricky_pointer::exchange(next->my_prev, nullptr, std::memory_order_release);
378                 next->my_going.store(1U, std::memory_order_release);
379             }
380     unlock_self:
381             unblock_or_wait_on_internal_lock(s, get_flag(tmp));
382         }
383     done:
384         spin_wait_while_eq( s.my_going, 2U );
385 
386         s.initialize();
387     }
388 
389     static bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) {
390         if ( s.my_state.load(std::memory_order_relaxed) == STATE_ACTIVEREADER ) return true; // Already a reader
391 
392         ITT_NOTIFY(sync_releasing, s.my_mutex);
393         s.my_state.store(STATE_READER, std::memory_order_relaxed);
394         if( ! tricky_pointer::load(s.my_next, std::memory_order_relaxed)) {
395             // the following load of q_tail must not be reordered with setting STATE_READER above
396             if( &s==s.my_mutex->q_tail.load() ) {
397                 unsigned char old_state = STATE_READER;
398                 s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_release);
399                 if( old_state==STATE_READER )
400                     return true; // Downgrade completed
401             }
402             /* wait for the next to register */
403             spin_wait_while_eq( s.my_next, 0U );
404         }
405         d1::queuing_rw_mutex::scoped_lock *const next = tricky_pointer::load(s.my_next, std::memory_order_acquire);
406         __TBB_ASSERT( next, "still no successor at this point!" );
407         if( next->my_state & STATE_COMBINED_WAITINGREADER )
408             next->my_going.store(1U, std::memory_order_release);
409         else if( next->my_state==STATE_UPGRADE_WAITING )
410             // the next waiting for upgrade means this writer was upgraded before.
411             next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed);
412         s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed);;
413         return true;
414     }
415 
416     static bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) {
417         if ( s.my_state.load(std::memory_order_relaxed) == STATE_WRITER ) return true; // Already a writer
418 
419         __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "only active reader can be updated" );
420 
421         queuing_rw_mutex::scoped_lock * tmp;
422         queuing_rw_mutex::scoped_lock * me = &s;
423 
424         ITT_NOTIFY(sync_releasing, s.my_mutex);
425         s.my_state.store(STATE_UPGRADE_REQUESTED, std::memory_order_relaxed);
426     requested:
427         __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer!" );
428         acquire_internal_lock(s);
429         d1::queuing_rw_mutex::scoped_lock* expected = &s;
430         if( !s.my_mutex->q_tail.compare_exchange_strong(expected, tricky_pointer(me)|FLAG, std::memory_order_release) ) {
431             spin_wait_while_eq( s.my_next, 0U );
432             queuing_rw_mutex::scoped_lock * next;
433             next = tricky_pointer::fetch_add(s.my_next, FLAG, std::memory_order_acquire);
434             unsigned short n_state = next->my_state;
435             /* the next reader can be blocked by our state. the best thing to do is to unblock it */
436             if( n_state & STATE_COMBINED_WAITINGREADER )
437                 next->my_going.store(1U, std::memory_order_release);
438             // Responsibility transition, the one who reads uncorrupted my_prev will do release.
439             tmp = tricky_pointer::exchange(next->my_prev, &s, std::memory_order_release);
440             unblock_or_wait_on_internal_lock(s, get_flag(tmp));
441             if( n_state & (STATE_COMBINED_READER | STATE_UPGRADE_REQUESTED) ) {
442                 // save next|FLAG for simplicity of following comparisons
443                 tmp = tricky_pointer(next)|FLAG;
444                 for( atomic_backoff b; tricky_pointer::load(s.my_next, std::memory_order_relaxed)==tmp; b.pause() ) {
445                     if( s.my_state & STATE_COMBINED_UPGRADING ) {
446                         if( tricky_pointer::load(s.my_next, std::memory_order_acquire)==tmp )
447                             tricky_pointer::store(s.my_next, next, std::memory_order_relaxed);
448                         goto waiting;
449                     }
450                 }
451                 __TBB_ASSERT(tricky_pointer::load(s.my_next, std::memory_order_relaxed) != (tricky_pointer(next)|FLAG), nullptr);
452                 goto requested;
453             } else {
454                 __TBB_ASSERT( n_state & (STATE_WRITER | STATE_UPGRADE_WAITING), "unexpected state");
455                 __TBB_ASSERT( (tricky_pointer(next)|FLAG) == tricky_pointer::load(s.my_next, std::memory_order_relaxed), nullptr);
456                 tricky_pointer::store(s.my_next, next, std::memory_order_relaxed);
457             }
458         } else {
459             /* We are in the tail; whoever comes next is blocked by q_tail&FLAG */
460             release_internal_lock(s);
461         } // if( this != my_mutex->q_tail... )
462         {
463             unsigned char old_state = STATE_UPGRADE_REQUESTED;
464             s.my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_acquire);
465         }
466     waiting:
467         __TBB_ASSERT( !( s.my_next.load(std::memory_order_relaxed) & FLAG ), "use of corrupted pointer!" );
468         __TBB_ASSERT( s.my_state & STATE_COMBINED_UPGRADING, "wrong state at upgrade waiting_retry" );
469         __TBB_ASSERT( me==&s, nullptr );
470         ITT_NOTIFY(sync_prepare, s.my_mutex);
471         /* if no one was blocked by the "corrupted" q_tail, turn it back */
472         expected = tricky_pointer(me)|FLAG;
473         s.my_mutex->q_tail.compare_exchange_strong(expected, &s, std::memory_order_release);
474         queuing_rw_mutex::scoped_lock * predecessor;
475         // Mark my_prev as 'in use' to prevent predecessor from releasing
476         predecessor = tricky_pointer::fetch_add(s.my_prev, FLAG, std::memory_order_acquire);
477         if( predecessor ) {
478             bool success = try_acquire_internal_lock(*predecessor);
479             {
480                 // While the predecessor pointer (my_prev) is in use (FLAG is set), we can safely update the node`s state.
481                 // Corrupted pointer transitions responsibility to release the predecessor`s node on us.
482                 unsigned char old_state = STATE_UPGRADE_REQUESTED;
483                 predecessor->my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_release);
484             }
485             if( !success ) {
486                 // Responsibility transition, the one who reads uncorrupted my_prev will do release.
487                 tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor)|FLAG, predecessor, std::memory_order_release);
488                 if( tricky_pointer(tmp) & FLAG ) {
489                     tricky_pointer::spin_wait_while_eq(s.my_prev, predecessor);
490                     predecessor = tricky_pointer::load(s.my_prev, std::memory_order_relaxed);
491                 } else {
492                     // TODO: spin_wait condition seems never reachable
493                     tricky_pointer::spin_wait_while_eq(s.my_prev, tricky_pointer(predecessor)|FLAG);
494                     release_internal_lock(*predecessor);
495                 }
496             } else {
497                 tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed);
498                 release_internal_lock(*predecessor);
499                 tricky_pointer::spin_wait_while_eq(s.my_prev, predecessor);
500                 predecessor = tricky_pointer::load(s.my_prev, std::memory_order_relaxed);
501             }
502             if( predecessor )
503                 goto waiting;
504         } else {
505             tricky_pointer::store(s.my_prev, nullptr, std::memory_order_relaxed);
506         }
507         __TBB_ASSERT( !predecessor && !s.my_prev, nullptr );
508 
509         // additional lifetime issue prevention checks
510         // wait for the successor to finish working with my fields
511         wait_for_release_of_internal_lock(s);
512         // now wait for the predecessor to finish working with my fields
513         spin_wait_while_eq( s.my_going, 2U );
514 
515         // Acquire critical section indirectly from previous owner or directly from predecessor (TODO: not clear).
516         atomic_fence(std::memory_order_acquire); // on either "my_mutex->q_tail" or "my_going" (TODO: not clear)
517 
518         bool result = ( s.my_state != STATE_UPGRADE_LOSER );
519         s.my_state.store(STATE_WRITER, std::memory_order_relaxed);
520         s.my_going.store(1U, std::memory_order_relaxed);
521 
522         ITT_NOTIFY(sync_acquired, s.my_mutex);
523         return result;
524     }
525 
526     static void construct(d1::queuing_rw_mutex& m) {
527         suppress_unused_warning(m);
528         ITT_SYNC_CREATE(&m, _T("tbb::queuing_rw_mutex"), _T(""));
529     }
530 };
531 
532 void __TBB_EXPORTED_FUNC acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) {
533     queuing_rw_mutex_impl::acquire(m, s, write);
534 }
535 
536 bool __TBB_EXPORTED_FUNC try_acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) {
537     return queuing_rw_mutex_impl::try_acquire(m, s, write);
538 }
539 
540 void __TBB_EXPORTED_FUNC release(d1::queuing_rw_mutex::scoped_lock& s) {
541     queuing_rw_mutex_impl::release(s);
542 }
543 
544 bool __TBB_EXPORTED_FUNC upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) {
545     return queuing_rw_mutex_impl::upgrade_to_writer(s);
546 }
547 
548 bool __TBB_EXPORTED_FUNC downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) {
549     return queuing_rw_mutex_impl::downgrade_to_reader(s);
550 }
551 
552 void __TBB_EXPORTED_FUNC construct(d1::queuing_rw_mutex& m) {
553     queuing_rw_mutex_impl::construct(m);
554 }
555 
556 } // namespace r1
557 } // namespace detail
558 } // namespace tbb
559