151c0b2f7Stbbdev /*
2*c21e688aSSergey Zheltov     Copyright (c) 2005-2022 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
1749e08aacStbbdev #include "oneapi/tbb/detail/_config.h"
1849e08aacStbbdev #include "oneapi/tbb/detail/_assert.h"
1951c0b2f7Stbbdev #include "../tbb/assert_impl.h"
2051c0b2f7Stbbdev 
2151c0b2f7Stbbdev #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
2251c0b2f7Stbbdev 
2351c0b2f7Stbbdev #ifndef _CRT_SECURE_NO_DEPRECATE
2451c0b2f7Stbbdev #define _CRT_SECURE_NO_DEPRECATE 1
2551c0b2f7Stbbdev #endif
2651c0b2f7Stbbdev 
2751c0b2f7Stbbdev // no standard-conforming implementation of snprintf prior to VS 2015
2851c0b2f7Stbbdev #if !defined(_MSC_VER) || _MSC_VER>=1900
2951c0b2f7Stbbdev #define LOG_PRINT(s, n, format, ...) snprintf(s, n, format, __VA_ARGS__)
3051c0b2f7Stbbdev #else
3151c0b2f7Stbbdev #define LOG_PRINT(s, n, format, ...) _snprintf_s(s, n, _TRUNCATE, format, __VA_ARGS__)
3251c0b2f7Stbbdev #endif
3351c0b2f7Stbbdev 
3451c0b2f7Stbbdev #include <windows.h>
3551c0b2f7Stbbdev #include <new>
3651c0b2f7Stbbdev #include <stdio.h>
3751c0b2f7Stbbdev #include <string.h>
3851c0b2f7Stbbdev 
3951c0b2f7Stbbdev #include "function_replacement.h"
4051c0b2f7Stbbdev 
4151c0b2f7Stbbdev // The information about a standard memory allocation function for the replacement log
4251c0b2f7Stbbdev struct FunctionInfo {
4351c0b2f7Stbbdev     const char* funcName;
4451c0b2f7Stbbdev     const char* dllName;
4551c0b2f7Stbbdev };
4651c0b2f7Stbbdev 
4751c0b2f7Stbbdev // Namespace that processes and manages the output of records to the Log journal
4851c0b2f7Stbbdev // that will be provided to user by TBB_malloc_replacement_log()
4951c0b2f7Stbbdev namespace Log {
5051c0b2f7Stbbdev     // Value of RECORDS_COUNT is set due to the fact that we maximally
5151c0b2f7Stbbdev     // scan 8 modules, and in every module we can swap 6 opcodes. (rounded to 8)
5251c0b2f7Stbbdev     static const unsigned RECORDS_COUNT = 8 * 8;
5351c0b2f7Stbbdev     static const unsigned RECORD_LENGTH = MAX_PATH;
5451c0b2f7Stbbdev 
5557f524caSIlya Isaev     // Need to add 1 to count of records, because last record must be always nullptr
5651c0b2f7Stbbdev     static char *records[RECORDS_COUNT + 1];
5751c0b2f7Stbbdev     static bool replacement_status = true;
5851c0b2f7Stbbdev 
5951c0b2f7Stbbdev     // Internal counter that contains number of next string for record
6051c0b2f7Stbbdev     static unsigned record_number = 0;
6151c0b2f7Stbbdev 
6251c0b2f7Stbbdev     // Function that writes info about (not)found opcodes to the Log journal
6351c0b2f7Stbbdev     // functionInfo - information about a standard memory allocation function for the replacement log
6451c0b2f7Stbbdev     // opcodeString - string, that contain byte code of this function
6551c0b2f7Stbbdev     // status - information about function replacement status
record(FunctionInfo functionInfo,const char * opcodeString,bool status)6651c0b2f7Stbbdev     static void record(FunctionInfo functionInfo, const char * opcodeString, bool status) {
6751c0b2f7Stbbdev         __TBB_ASSERT(functionInfo.dllName, "Empty DLL name value");
6851c0b2f7Stbbdev         __TBB_ASSERT(functionInfo.funcName, "Empty function name value");
6951c0b2f7Stbbdev         __TBB_ASSERT(opcodeString, "Empty opcode");
7051c0b2f7Stbbdev         __TBB_ASSERT(record_number <= RECORDS_COUNT, "Incorrect record number");
7151c0b2f7Stbbdev 
7251c0b2f7Stbbdev         //If some replacement failed -> set status to false
7351c0b2f7Stbbdev         replacement_status &= status;
7451c0b2f7Stbbdev 
7551c0b2f7Stbbdev         // If we reach the end of the log, write this message to the last line
7651c0b2f7Stbbdev         if (record_number == RECORDS_COUNT) {
7751c0b2f7Stbbdev             // %s - workaround to fix empty variable argument parsing behavior in GCC
7851c0b2f7Stbbdev             LOG_PRINT(records[RECORDS_COUNT - 1], RECORD_LENGTH, "%s", "Log was truncated.");
7951c0b2f7Stbbdev             return;
8051c0b2f7Stbbdev         }
8151c0b2f7Stbbdev 
8251c0b2f7Stbbdev         char* entry = (char*)HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, RECORD_LENGTH);
8351c0b2f7Stbbdev         __TBB_ASSERT(entry, "Invalid memory was returned");
8451c0b2f7Stbbdev 
8551c0b2f7Stbbdev         LOG_PRINT(entry, RECORD_LENGTH, "%s: %s (%s), byte pattern: <%s>",
8651c0b2f7Stbbdev             status ? "Success" : "Fail", functionInfo.funcName, functionInfo.dllName, opcodeString);
8751c0b2f7Stbbdev 
8851c0b2f7Stbbdev         records[record_number++] = entry;
8951c0b2f7Stbbdev     }
9051c0b2f7Stbbdev };
9151c0b2f7Stbbdev 
Ptr2Addrint(LPVOID ptr)9251c0b2f7Stbbdev inline UINT_PTR Ptr2Addrint(LPVOID ptr)
9351c0b2f7Stbbdev {
9451c0b2f7Stbbdev     Int2Ptr i2p;
9551c0b2f7Stbbdev     i2p.lpv = ptr;
9651c0b2f7Stbbdev     return i2p.uip;
9751c0b2f7Stbbdev }
9851c0b2f7Stbbdev 
Addrint2Ptr(UINT_PTR ptr)9951c0b2f7Stbbdev inline LPVOID Addrint2Ptr(UINT_PTR ptr)
10051c0b2f7Stbbdev {
10151c0b2f7Stbbdev     Int2Ptr i2p;
10251c0b2f7Stbbdev     i2p.uip = ptr;
10351c0b2f7Stbbdev     return i2p.lpv;
10451c0b2f7Stbbdev }
10551c0b2f7Stbbdev 
10651c0b2f7Stbbdev // Is the distance between addr1 and addr2 smaller than dist
IsInDistance(UINT_PTR addr1,UINT_PTR addr2,__int64 dist)10751c0b2f7Stbbdev inline bool IsInDistance(UINT_PTR addr1, UINT_PTR addr2, __int64 dist)
10851c0b2f7Stbbdev {
10951c0b2f7Stbbdev     __int64 diff = addr1>addr2 ? addr1-addr2 : addr2-addr1;
11051c0b2f7Stbbdev     return diff<dist;
11151c0b2f7Stbbdev }
11251c0b2f7Stbbdev 
11351c0b2f7Stbbdev /*
11451c0b2f7Stbbdev  * When inserting a probe in 64 bits process the distance between the insertion
11551c0b2f7Stbbdev  * point and the target may be bigger than 2^32. In this case we are using
11651c0b2f7Stbbdev  * indirect jump through memory where the offset to this memory location
11751c0b2f7Stbbdev  * is smaller than 2^32 and it contains the absolute address (8 bytes).
11851c0b2f7Stbbdev  *
11951c0b2f7Stbbdev  * This class is used to hold the pages used for the above trampolines.
12051c0b2f7Stbbdev  * Since this utility will be used to replace malloc functions this implementation
12151c0b2f7Stbbdev  * doesn't allocate memory dynamically.
12251c0b2f7Stbbdev  *
12351c0b2f7Stbbdev  * The struct MemoryBuffer holds the data about a page in the memory used for
12451c0b2f7Stbbdev  * replacing functions in 64-bit code where the target is too far to be replaced
12551c0b2f7Stbbdev  * with a short jump. All the calculations of m_base and m_next are in a multiple
12651c0b2f7Stbbdev  * of SIZE_OF_ADDRESS (which is 8 in Win64).
12751c0b2f7Stbbdev  */
12851c0b2f7Stbbdev class MemoryProvider {
12951c0b2f7Stbbdev private:
13051c0b2f7Stbbdev     struct MemoryBuffer {
13151c0b2f7Stbbdev         UINT_PTR m_base;    // base address of the buffer
13251c0b2f7Stbbdev         UINT_PTR m_next;    // next free location in the buffer
13351c0b2f7Stbbdev         DWORD    m_size;    // size of buffer
13451c0b2f7Stbbdev 
13551c0b2f7Stbbdev         // Default constructor
MemoryBufferMemoryProvider::MemoryBuffer13651c0b2f7Stbbdev         MemoryBuffer() : m_base(0), m_next(0), m_size(0) {}
13751c0b2f7Stbbdev 
13851c0b2f7Stbbdev         // Constructor
MemoryBufferMemoryProvider::MemoryBuffer13951c0b2f7Stbbdev         MemoryBuffer(void *base, DWORD size)
14051c0b2f7Stbbdev         {
14151c0b2f7Stbbdev             m_base = Ptr2Addrint(base);
14251c0b2f7Stbbdev             m_next = m_base;
14351c0b2f7Stbbdev             m_size = size;
14451c0b2f7Stbbdev         }
14551c0b2f7Stbbdev     };
14651c0b2f7Stbbdev 
CreateBuffer(UINT_PTR addr)14751c0b2f7Stbbdev MemoryBuffer *CreateBuffer(UINT_PTR addr)
14851c0b2f7Stbbdev     {
14951c0b2f7Stbbdev         // No more room in the pages database
15051c0b2f7Stbbdev         if (m_lastBuffer - m_pages == MAX_NUM_BUFFERS)
15151c0b2f7Stbbdev             return 0;
15251c0b2f7Stbbdev 
15351c0b2f7Stbbdev         void *newAddr = Addrint2Ptr(addr);
15451c0b2f7Stbbdev         // Get information for the region which the given address belongs to
15551c0b2f7Stbbdev         MEMORY_BASIC_INFORMATION memInfo;
15651c0b2f7Stbbdev         if (VirtualQuery(newAddr, &memInfo, sizeof(memInfo)) != sizeof(memInfo))
15751c0b2f7Stbbdev             return 0;
15851c0b2f7Stbbdev 
15951c0b2f7Stbbdev         for(;;) {
16051c0b2f7Stbbdev             // The new address to check is beyond the current region and aligned to allocation size
16151c0b2f7Stbbdev             newAddr = Addrint2Ptr( (Ptr2Addrint(memInfo.BaseAddress) + memInfo.RegionSize + m_allocSize) & ~(UINT_PTR)(m_allocSize-1) );
16251c0b2f7Stbbdev 
16351c0b2f7Stbbdev             // Check that the address is in the right distance.
16451c0b2f7Stbbdev             // VirtualAlloc can only round the address down; so it will remain in the right distance
16551c0b2f7Stbbdev             if (!IsInDistance(addr, Ptr2Addrint(newAddr), MAX_DISTANCE))
16651c0b2f7Stbbdev                 break;
16751c0b2f7Stbbdev 
16851c0b2f7Stbbdev             if (VirtualQuery(newAddr, &memInfo, sizeof(memInfo)) != sizeof(memInfo))
16951c0b2f7Stbbdev                 break;
17051c0b2f7Stbbdev 
17151c0b2f7Stbbdev             if (memInfo.State == MEM_FREE && memInfo.RegionSize >= m_allocSize)
17251c0b2f7Stbbdev             {
17351c0b2f7Stbbdev                 // Found a free region, try to allocate a page in this region
17451c0b2f7Stbbdev                 void *newPage = VirtualAlloc(newAddr, m_allocSize, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
17551c0b2f7Stbbdev                 if (!newPage)
17651c0b2f7Stbbdev                     break;
17751c0b2f7Stbbdev 
17851c0b2f7Stbbdev                 // Add the new page to the pages database
17951c0b2f7Stbbdev                 MemoryBuffer *pBuff = new (m_lastBuffer) MemoryBuffer(newPage, m_allocSize);
18051c0b2f7Stbbdev                 ++m_lastBuffer;
18151c0b2f7Stbbdev                 return pBuff;
18251c0b2f7Stbbdev             }
18351c0b2f7Stbbdev         }
18451c0b2f7Stbbdev 
18551c0b2f7Stbbdev         // Failed to find a buffer in the distance
18651c0b2f7Stbbdev         return 0;
18751c0b2f7Stbbdev     }
18851c0b2f7Stbbdev 
18951c0b2f7Stbbdev public:
MemoryProvider()19051c0b2f7Stbbdev     MemoryProvider()
19151c0b2f7Stbbdev     {
19251c0b2f7Stbbdev         SYSTEM_INFO sysInfo;
19351c0b2f7Stbbdev         GetSystemInfo(&sysInfo);
19451c0b2f7Stbbdev         m_allocSize = sysInfo.dwAllocationGranularity;
19551c0b2f7Stbbdev         m_lastBuffer = &m_pages[0];
19651c0b2f7Stbbdev     }
19751c0b2f7Stbbdev 
19851c0b2f7Stbbdev     // We can't free the pages in the destructor because the trampolines
19951c0b2f7Stbbdev     // are using these memory locations and a replaced function might be called
20051c0b2f7Stbbdev     // after the destructor was called.
~MemoryProvider()20151c0b2f7Stbbdev     ~MemoryProvider()
20251c0b2f7Stbbdev     {
20351c0b2f7Stbbdev     }
20451c0b2f7Stbbdev 
20551c0b2f7Stbbdev     // Return a memory location in distance less than 2^31 from input address
GetLocation(UINT_PTR addr)20651c0b2f7Stbbdev     UINT_PTR GetLocation(UINT_PTR addr)
20751c0b2f7Stbbdev     {
20851c0b2f7Stbbdev         MemoryBuffer *pBuff = m_pages;
20951c0b2f7Stbbdev         for (; pBuff<m_lastBuffer && IsInDistance(pBuff->m_next, addr, MAX_DISTANCE); ++pBuff)
21051c0b2f7Stbbdev         {
21151c0b2f7Stbbdev             if (pBuff->m_next < pBuff->m_base + pBuff->m_size)
21251c0b2f7Stbbdev             {
21351c0b2f7Stbbdev                 UINT_PTR loc = pBuff->m_next;
21451c0b2f7Stbbdev                 pBuff->m_next += MAX_PROBE_SIZE;
21551c0b2f7Stbbdev                 return loc;
21651c0b2f7Stbbdev             }
21751c0b2f7Stbbdev         }
21851c0b2f7Stbbdev 
21951c0b2f7Stbbdev         pBuff = CreateBuffer(addr);
22051c0b2f7Stbbdev         if(!pBuff)
22151c0b2f7Stbbdev             return 0;
22251c0b2f7Stbbdev 
22351c0b2f7Stbbdev         UINT_PTR loc = pBuff->m_next;
22451c0b2f7Stbbdev         pBuff->m_next += MAX_PROBE_SIZE;
22551c0b2f7Stbbdev         return loc;
22651c0b2f7Stbbdev     }
22751c0b2f7Stbbdev 
22851c0b2f7Stbbdev private:
22951c0b2f7Stbbdev     MemoryBuffer m_pages[MAX_NUM_BUFFERS];
23051c0b2f7Stbbdev     MemoryBuffer *m_lastBuffer;
23151c0b2f7Stbbdev     DWORD m_allocSize;
23251c0b2f7Stbbdev };
23351c0b2f7Stbbdev 
23451c0b2f7Stbbdev static MemoryProvider memProvider;
23551c0b2f7Stbbdev 
23651c0b2f7Stbbdev // Compare opcodes from dictionary (str1) and opcodes from code (str2)
23751c0b2f7Stbbdev // str1 might contain '*' to mask addresses
23851c0b2f7Stbbdev // RETURN: 0 if opcodes did not match, 1 on success
compareStrings(const char * str1,const char * str2)23951c0b2f7Stbbdev size_t compareStrings( const char *str1, const char *str2 )
24051c0b2f7Stbbdev {
24151c0b2f7Stbbdev    for (size_t i=0; str1[i]!=0; i++){
24251c0b2f7Stbbdev        if( str1[i]!='*' && str1[i]!='#' && str1[i]!=str2[i] ) return 0;
24351c0b2f7Stbbdev    }
24451c0b2f7Stbbdev    return 1;
24551c0b2f7Stbbdev }
24651c0b2f7Stbbdev 
24751c0b2f7Stbbdev // Check function prologue with known prologues from the dictionary
24851c0b2f7Stbbdev // opcodes - dictionary
24951c0b2f7Stbbdev // inpAddr - pointer to function prologue
25051c0b2f7Stbbdev // Dictionary contains opcodes for several full asm instructions
25151c0b2f7Stbbdev // + one opcode byte for the next asm instruction for safe address processing
25251c0b2f7Stbbdev // RETURN: 1 + the index of the matched pattern, or 0 if no match found.
CheckOpcodes(const char ** opcodes,void * inpAddr,bool abortOnError,const FunctionInfo * functionInfo=nullptr)25357f524caSIlya Isaev static UINT CheckOpcodes( const char ** opcodes, void *inpAddr, bool abortOnError, const FunctionInfo* functionInfo = nullptr)
25451c0b2f7Stbbdev {
25551c0b2f7Stbbdev     static size_t opcodesStringsCount = 0;
25651c0b2f7Stbbdev     static size_t maxOpcodesLength = 0;
25751c0b2f7Stbbdev     static size_t opcodes_pointer = (size_t)opcodes;
25851c0b2f7Stbbdev     char opcodeString[2*MAX_PATTERN_SIZE+1];
25951c0b2f7Stbbdev     size_t i;
26051c0b2f7Stbbdev     size_t result = 0;
26151c0b2f7Stbbdev 
26251c0b2f7Stbbdev     // Get the values for static variables
26351c0b2f7Stbbdev     // max length and number of patterns
26451c0b2f7Stbbdev     if( !opcodesStringsCount || opcodes_pointer != (size_t)opcodes ){
26557f524caSIlya Isaev         while( *(opcodes + opcodesStringsCount)!= nullptr ){
26651c0b2f7Stbbdev             if( (i=strlen(*(opcodes + opcodesStringsCount))) > maxOpcodesLength )
26751c0b2f7Stbbdev                 maxOpcodesLength = i;
26851c0b2f7Stbbdev             opcodesStringsCount++;
26951c0b2f7Stbbdev         }
27051c0b2f7Stbbdev         opcodes_pointer = (size_t)opcodes;
27151c0b2f7Stbbdev         __TBB_ASSERT( maxOpcodesLength/2 <= MAX_PATTERN_SIZE, "Pattern exceeded the limit of 28 opcodes/56 symbols" );
27251c0b2f7Stbbdev     }
27351c0b2f7Stbbdev 
27451c0b2f7Stbbdev     // Translate prologue opcodes to string format to compare
27551c0b2f7Stbbdev     for( i=0; i<maxOpcodesLength/2 && i<MAX_PATTERN_SIZE; ++i ){
27651c0b2f7Stbbdev         sprintf( opcodeString + 2*i, "%.2X", *((unsigned char*)inpAddr+i) );
27751c0b2f7Stbbdev     }
27851c0b2f7Stbbdev     opcodeString[2*i] = 0;
27951c0b2f7Stbbdev 
28051c0b2f7Stbbdev     // Compare translated opcodes with patterns
28151c0b2f7Stbbdev     for( UINT idx=0; idx<opcodesStringsCount; ++idx ){
28251c0b2f7Stbbdev         result = compareStrings( opcodes[idx],opcodeString );
28351c0b2f7Stbbdev         if( result ) {
28451c0b2f7Stbbdev             if (functionInfo) {
28551c0b2f7Stbbdev                 Log::record(*functionInfo, opcodeString, /*status*/ true);
28651c0b2f7Stbbdev             }
28751c0b2f7Stbbdev             return idx + 1; // avoid 0 which indicates a failure
28851c0b2f7Stbbdev         }
28951c0b2f7Stbbdev     }
29051c0b2f7Stbbdev     if (functionInfo) {
29151c0b2f7Stbbdev         Log::record(*functionInfo, opcodeString, /*status*/ false);
29251c0b2f7Stbbdev     }
29351c0b2f7Stbbdev     if (abortOnError) {
29451c0b2f7Stbbdev         // Impossibility to find opcodes in the dictionary is a serious issue,
29551c0b2f7Stbbdev         // as if we unable to call original function, leak or crash is expected result.
29651c0b2f7Stbbdev         __TBB_ASSERT_RELEASE( false, "CheckOpcodes failed" );
29751c0b2f7Stbbdev     }
29851c0b2f7Stbbdev     return 0;
29951c0b2f7Stbbdev }
30051c0b2f7Stbbdev 
30151c0b2f7Stbbdev // Modify offsets in original code after moving it to a trampoline.
30251c0b2f7Stbbdev // We do not have more than one offset to correct in existing opcode patterns.
CorrectOffset(UINT_PTR address,const char * pattern,UINT distance)30351c0b2f7Stbbdev static void CorrectOffset( UINT_PTR address, const char* pattern, UINT distance )
30451c0b2f7Stbbdev {
30551c0b2f7Stbbdev     const char* pos = strstr(pattern, "#*******");
30651c0b2f7Stbbdev     if( pos ) {
30751c0b2f7Stbbdev         address += (pos - pattern)/2; // compute the offset position
30851c0b2f7Stbbdev         UINT value;
30951c0b2f7Stbbdev         // UINT assignment is not used to avoid potential alignment issues
31051c0b2f7Stbbdev         memcpy(&value, Addrint2Ptr(address), sizeof(value));
31151c0b2f7Stbbdev         value += distance;
31251c0b2f7Stbbdev         memcpy(Addrint2Ptr(address), &value, sizeof(value));
31351c0b2f7Stbbdev     }
31451c0b2f7Stbbdev }
31551c0b2f7Stbbdev 
31651c0b2f7Stbbdev // Insert jump relative instruction to the input address
31751c0b2f7Stbbdev // RETURN: the size of the trampoline or 0 on failure
InsertTrampoline32(void * inpAddr,void * targetAddr,const char * pattern,void ** storedAddr)31851c0b2f7Stbbdev static DWORD InsertTrampoline32(void *inpAddr, void *targetAddr, const char* pattern, void** storedAddr)
31951c0b2f7Stbbdev {
32051c0b2f7Stbbdev     size_t bytesToMove = SIZE_OF_RELJUMP;
32151c0b2f7Stbbdev     UINT_PTR srcAddr = Ptr2Addrint(inpAddr);
32251c0b2f7Stbbdev     UINT_PTR tgtAddr = Ptr2Addrint(targetAddr);
32351c0b2f7Stbbdev     // Check that the target fits in 32 bits
32451c0b2f7Stbbdev     if (!IsInDistance(srcAddr, tgtAddr, MAX_DISTANCE))
32551c0b2f7Stbbdev         return 0;
32651c0b2f7Stbbdev 
32751c0b2f7Stbbdev     UINT_PTR offset;
32851c0b2f7Stbbdev     UINT offset32;
32951c0b2f7Stbbdev     UCHAR *codePtr = (UCHAR *)inpAddr;
33051c0b2f7Stbbdev 
33151c0b2f7Stbbdev     if ( storedAddr ){ // If requested, store original function code
33251c0b2f7Stbbdev         bytesToMove = strlen(pattern)/2-1; // The last byte matching the pattern must not be copied
33351c0b2f7Stbbdev         __TBB_ASSERT_RELEASE( bytesToMove >= SIZE_OF_RELJUMP, "Incorrect bytecode pattern?" );
33451c0b2f7Stbbdev         UINT_PTR trampAddr = memProvider.GetLocation(srcAddr);
33551c0b2f7Stbbdev         if (!trampAddr)
33651c0b2f7Stbbdev             return 0;
33751c0b2f7Stbbdev         *storedAddr = Addrint2Ptr(trampAddr);
33851c0b2f7Stbbdev         // Set 'executable' flag for original instructions in the new place
33951c0b2f7Stbbdev         DWORD pageFlags = PAGE_EXECUTE_READWRITE;
34051c0b2f7Stbbdev         if (!VirtualProtect(*storedAddr, MAX_PROBE_SIZE, pageFlags, &pageFlags)) return 0;
34151c0b2f7Stbbdev         // Copy original instructions to the new place
34251c0b2f7Stbbdev         memcpy(*storedAddr, codePtr, bytesToMove);
34351c0b2f7Stbbdev         offset = srcAddr - trampAddr;
34451c0b2f7Stbbdev         offset32 = (UINT)(offset & 0xFFFFFFFF);
34551c0b2f7Stbbdev         CorrectOffset( trampAddr, pattern, offset32 );
34651c0b2f7Stbbdev         // Set jump to the code after replacement
34751c0b2f7Stbbdev         offset32 -= SIZE_OF_RELJUMP;
34851c0b2f7Stbbdev         *(UCHAR*)(trampAddr+bytesToMove) = 0xE9;
34951c0b2f7Stbbdev         memcpy((UCHAR*)(trampAddr+bytesToMove+1), &offset32, sizeof(offset32));
35051c0b2f7Stbbdev     }
35151c0b2f7Stbbdev 
35251c0b2f7Stbbdev     // The following will work correctly even if srcAddr>tgtAddr, as long as
35351c0b2f7Stbbdev     // address difference is less than 2^31, which is guaranteed by IsInDistance.
35451c0b2f7Stbbdev     offset = tgtAddr - srcAddr - SIZE_OF_RELJUMP;
35551c0b2f7Stbbdev     offset32 = (UINT)(offset & 0xFFFFFFFF);
35651c0b2f7Stbbdev     // Insert the jump to the new code
35751c0b2f7Stbbdev     *codePtr = 0xE9;
35851c0b2f7Stbbdev     memcpy(codePtr+1, &offset32, sizeof(offset32));
35951c0b2f7Stbbdev 
36051c0b2f7Stbbdev     // Fill the rest with NOPs to correctly see disassembler of old code in debugger.
36151c0b2f7Stbbdev     for( unsigned i=SIZE_OF_RELJUMP; i<bytesToMove; i++ ){
36251c0b2f7Stbbdev         *(codePtr+i) = 0x90;
36351c0b2f7Stbbdev     }
36451c0b2f7Stbbdev 
36551c0b2f7Stbbdev     return SIZE_OF_RELJUMP;
36651c0b2f7Stbbdev }
36751c0b2f7Stbbdev 
36851c0b2f7Stbbdev // This function is called when the offset doesn't fit in 32 bits
36951c0b2f7Stbbdev // 1  Find and allocate a page in the small distance (<2^31) from input address
37051c0b2f7Stbbdev // 2  Put jump RIP relative indirect through the address in the close page
37151c0b2f7Stbbdev // 3  Put the absolute address of the target in the allocated location
37251c0b2f7Stbbdev // RETURN: the size of the trampoline or 0 on failure
InsertTrampoline64(void * inpAddr,void * targetAddr,const char * pattern,void ** storedAddr)37351c0b2f7Stbbdev static DWORD InsertTrampoline64(void *inpAddr, void *targetAddr, const char* pattern, void** storedAddr)
37451c0b2f7Stbbdev {
37551c0b2f7Stbbdev     size_t bytesToMove = SIZE_OF_INDJUMP;
37651c0b2f7Stbbdev 
37751c0b2f7Stbbdev     UINT_PTR srcAddr = Ptr2Addrint(inpAddr);
37851c0b2f7Stbbdev     UINT_PTR tgtAddr = Ptr2Addrint(targetAddr);
37951c0b2f7Stbbdev 
38051c0b2f7Stbbdev     // Get a location close to the source address
38151c0b2f7Stbbdev     UINT_PTR location = memProvider.GetLocation(srcAddr);
38251c0b2f7Stbbdev     if (!location)
38351c0b2f7Stbbdev         return 0;
38451c0b2f7Stbbdev 
38551c0b2f7Stbbdev     UINT_PTR offset;
38651c0b2f7Stbbdev     UINT offset32;
38751c0b2f7Stbbdev     UCHAR *codePtr = (UCHAR *)inpAddr;
38851c0b2f7Stbbdev 
38951c0b2f7Stbbdev     // Fill the location
39051c0b2f7Stbbdev     UINT_PTR *locPtr = (UINT_PTR *)Addrint2Ptr(location);
39151c0b2f7Stbbdev     *locPtr = tgtAddr;
39251c0b2f7Stbbdev 
39351c0b2f7Stbbdev     if ( storedAddr ){ // If requested, store original function code
39451c0b2f7Stbbdev         bytesToMove = strlen(pattern)/2-1; // The last byte matching the pattern must not be copied
39551c0b2f7Stbbdev         __TBB_ASSERT_RELEASE( bytesToMove >= SIZE_OF_INDJUMP, "Incorrect bytecode pattern?" );
39651c0b2f7Stbbdev         UINT_PTR trampAddr = memProvider.GetLocation(srcAddr);
39751c0b2f7Stbbdev         if (!trampAddr)
39851c0b2f7Stbbdev             return 0;
39951c0b2f7Stbbdev         *storedAddr = Addrint2Ptr(trampAddr);
40051c0b2f7Stbbdev         // Set 'executable' flag for original instructions in the new place
40151c0b2f7Stbbdev         DWORD pageFlags = PAGE_EXECUTE_READWRITE;
40251c0b2f7Stbbdev         if (!VirtualProtect(*storedAddr, MAX_PROBE_SIZE, pageFlags, &pageFlags)) return 0;
40351c0b2f7Stbbdev         // Copy original instructions to the new place
40451c0b2f7Stbbdev         memcpy(*storedAddr, codePtr, bytesToMove);
40551c0b2f7Stbbdev         offset = srcAddr - trampAddr;
40651c0b2f7Stbbdev         offset32 = (UINT)(offset & 0xFFFFFFFF);
40751c0b2f7Stbbdev         CorrectOffset( trampAddr, pattern, offset32 );
40851c0b2f7Stbbdev         // Set jump to the code after replacement. It is within the distance of relative jump!
40951c0b2f7Stbbdev         offset32 -= SIZE_OF_RELJUMP;
41051c0b2f7Stbbdev         *(UCHAR*)(trampAddr+bytesToMove) = 0xE9;
41151c0b2f7Stbbdev         memcpy((UCHAR*)(trampAddr+bytesToMove+1), &offset32, sizeof(offset32));
41251c0b2f7Stbbdev     }
41351c0b2f7Stbbdev 
41451c0b2f7Stbbdev     // Fill the buffer
41551c0b2f7Stbbdev     offset = location - srcAddr - SIZE_OF_INDJUMP;
41651c0b2f7Stbbdev     offset32 = (UINT)(offset & 0xFFFFFFFF);
41751c0b2f7Stbbdev     *(codePtr) = 0xFF;
41851c0b2f7Stbbdev     *(codePtr+1) = 0x25;
41951c0b2f7Stbbdev     memcpy(codePtr+2, &offset32, sizeof(offset32));
42051c0b2f7Stbbdev 
42151c0b2f7Stbbdev     // Fill the rest with NOPs to correctly see disassembler of old code in debugger.
42251c0b2f7Stbbdev     for( unsigned i=SIZE_OF_INDJUMP; i<bytesToMove; i++ ){
42351c0b2f7Stbbdev         *(codePtr+i) = 0x90;
42451c0b2f7Stbbdev     }
42551c0b2f7Stbbdev 
42651c0b2f7Stbbdev     return SIZE_OF_INDJUMP;
42751c0b2f7Stbbdev }
42851c0b2f7Stbbdev 
42951c0b2f7Stbbdev // Insert a jump instruction in the inpAddr to the targetAddr
43051c0b2f7Stbbdev // 1. Get the memory protection of the page containing the input address
43151c0b2f7Stbbdev // 2. Change the memory protection to writable
43251c0b2f7Stbbdev // 3. Call InsertTrampoline32 or InsertTrampoline64
43351c0b2f7Stbbdev // 4. Restore memory protection
43451c0b2f7Stbbdev // RETURN: FALSE on failure, TRUE on success
InsertTrampoline(void * inpAddr,void * targetAddr,const char ** opcodes,void ** origFunc)43551c0b2f7Stbbdev static bool InsertTrampoline(void *inpAddr, void *targetAddr, const char ** opcodes, void** origFunc)
43651c0b2f7Stbbdev {
43751c0b2f7Stbbdev     DWORD probeSize;
43851c0b2f7Stbbdev     // Change page protection to EXECUTE+WRITE
43951c0b2f7Stbbdev     DWORD origProt = 0;
44051c0b2f7Stbbdev     if (!VirtualProtect(inpAddr, MAX_PROBE_SIZE, PAGE_EXECUTE_WRITECOPY, &origProt))
44151c0b2f7Stbbdev         return FALSE;
44251c0b2f7Stbbdev 
44357f524caSIlya Isaev     const char* pattern = nullptr;
44451c0b2f7Stbbdev     if ( origFunc ){ // Need to store original function code
44551c0b2f7Stbbdev         UCHAR * const codePtr = (UCHAR *)inpAddr;
44651c0b2f7Stbbdev         if ( *codePtr == 0xE9 ){ // JMP relative instruction
44751c0b2f7Stbbdev             // For the special case when a system function consists of a single near jump,
44851c0b2f7Stbbdev             // instead of moving it somewhere we use the target of the jump as the original function.
44951c0b2f7Stbbdev             unsigned offsetInJmp = *(unsigned*)(codePtr + 1);
45051c0b2f7Stbbdev             *origFunc = (void*)(Ptr2Addrint(inpAddr) + offsetInJmp + SIZE_OF_RELJUMP);
45157f524caSIlya Isaev             origFunc = nullptr; // now it must be ignored by InsertTrampoline32/64
45251c0b2f7Stbbdev         } else {
45351c0b2f7Stbbdev             // find the right opcode pattern
45451c0b2f7Stbbdev             UINT opcodeIdx = CheckOpcodes( opcodes, inpAddr, /*abortOnError=*/true );
45551c0b2f7Stbbdev             __TBB_ASSERT( opcodeIdx > 0, "abortOnError ignored in CheckOpcodes?" );
45651c0b2f7Stbbdev             pattern = opcodes[opcodeIdx-1];  // -1 compensates for +1 in CheckOpcodes
45751c0b2f7Stbbdev         }
45851c0b2f7Stbbdev     }
45951c0b2f7Stbbdev 
46051c0b2f7Stbbdev     probeSize = InsertTrampoline32(inpAddr, targetAddr, pattern, origFunc);
46151c0b2f7Stbbdev     if (!probeSize)
46251c0b2f7Stbbdev         probeSize = InsertTrampoline64(inpAddr, targetAddr, pattern, origFunc);
46351c0b2f7Stbbdev 
46451c0b2f7Stbbdev     // Restore original protection
46551c0b2f7Stbbdev     VirtualProtect(inpAddr, MAX_PROBE_SIZE, origProt, &origProt);
46651c0b2f7Stbbdev 
46751c0b2f7Stbbdev     if (!probeSize)
46851c0b2f7Stbbdev         return FALSE;
46951c0b2f7Stbbdev 
47051c0b2f7Stbbdev     FlushInstructionCache(GetCurrentProcess(), inpAddr, probeSize);
47151c0b2f7Stbbdev     FlushInstructionCache(GetCurrentProcess(), origFunc, probeSize);
47251c0b2f7Stbbdev 
47351c0b2f7Stbbdev     return TRUE;
47451c0b2f7Stbbdev }
47551c0b2f7Stbbdev 
47651c0b2f7Stbbdev // Routine to replace the functions
47751c0b2f7Stbbdev // TODO: replace opcodesNumber with opcodes and opcodes number to check if we replace right code.
ReplaceFunctionA(const char * dllName,const char * funcName,FUNCPTR newFunc,const char ** opcodes,FUNCPTR * origFunc)47851c0b2f7Stbbdev FRR_TYPE ReplaceFunctionA(const char *dllName, const char *funcName, FUNCPTR newFunc, const char ** opcodes, FUNCPTR* origFunc)
47951c0b2f7Stbbdev {
48051c0b2f7Stbbdev     // Cache the results of the last search for the module
48151c0b2f7Stbbdev     // Assume that there was no DLL unload between
48251c0b2f7Stbbdev     static char cachedName[MAX_PATH+1];
48351c0b2f7Stbbdev     static HMODULE cachedHM = 0;
48451c0b2f7Stbbdev 
48551c0b2f7Stbbdev     if (!dllName || !*dllName)
48651c0b2f7Stbbdev         return FRR_NODLL;
48751c0b2f7Stbbdev 
48851c0b2f7Stbbdev     if (!cachedHM || strncmp(dllName, cachedName, MAX_PATH) != 0)
48951c0b2f7Stbbdev     {
49051c0b2f7Stbbdev         // Find the module handle for the input dll
49151c0b2f7Stbbdev         HMODULE hModule = GetModuleHandleA(dllName);
49251c0b2f7Stbbdev         if (hModule == 0)
49351c0b2f7Stbbdev         {
49451c0b2f7Stbbdev             // Couldn't find the module with the input name
49551c0b2f7Stbbdev             cachedHM = 0;
49651c0b2f7Stbbdev             return FRR_NODLL;
49751c0b2f7Stbbdev         }
49851c0b2f7Stbbdev 
49951c0b2f7Stbbdev         cachedHM = hModule;
50051c0b2f7Stbbdev         strncpy(cachedName, dllName, MAX_PATH);
50151c0b2f7Stbbdev     }
50251c0b2f7Stbbdev 
50351c0b2f7Stbbdev     FARPROC inpFunc = GetProcAddress(cachedHM, funcName);
50451c0b2f7Stbbdev     if (inpFunc == 0)
50551c0b2f7Stbbdev     {
50651c0b2f7Stbbdev         // Function was not found
50751c0b2f7Stbbdev         return FRR_NOFUNC;
50851c0b2f7Stbbdev     }
50951c0b2f7Stbbdev 
51051c0b2f7Stbbdev     if (!InsertTrampoline((void*)inpFunc, (void*)newFunc, opcodes, (void**)origFunc)){
51151c0b2f7Stbbdev         // Failed to insert the trampoline to the target address
51251c0b2f7Stbbdev         return FRR_FAILED;
51351c0b2f7Stbbdev     }
51451c0b2f7Stbbdev 
51551c0b2f7Stbbdev     return FRR_OK;
51651c0b2f7Stbbdev }
51751c0b2f7Stbbdev 
ReplaceFunctionW(const wchar_t * dllName,const char * funcName,FUNCPTR newFunc,const char ** opcodes,FUNCPTR * origFunc)51851c0b2f7Stbbdev FRR_TYPE ReplaceFunctionW(const wchar_t *dllName, const char *funcName, FUNCPTR newFunc, const char ** opcodes, FUNCPTR* origFunc)
51951c0b2f7Stbbdev {
52051c0b2f7Stbbdev     // Cache the results of the last search for the module
52151c0b2f7Stbbdev     // Assume that there was no DLL unload between
52251c0b2f7Stbbdev     static wchar_t cachedName[MAX_PATH+1];
52351c0b2f7Stbbdev     static HMODULE cachedHM = 0;
52451c0b2f7Stbbdev 
52551c0b2f7Stbbdev     if (!dllName || !*dllName)
52651c0b2f7Stbbdev         return FRR_NODLL;
52751c0b2f7Stbbdev 
52851c0b2f7Stbbdev     if (!cachedHM || wcsncmp(dllName, cachedName, MAX_PATH) != 0)
52951c0b2f7Stbbdev     {
53051c0b2f7Stbbdev         // Find the module handle for the input dll
53151c0b2f7Stbbdev         HMODULE hModule = GetModuleHandleW(dllName);
53251c0b2f7Stbbdev         if (hModule == 0)
53351c0b2f7Stbbdev         {
53451c0b2f7Stbbdev             // Couldn't find the module with the input name
53551c0b2f7Stbbdev             cachedHM = 0;
53651c0b2f7Stbbdev             return FRR_NODLL;
53751c0b2f7Stbbdev         }
53851c0b2f7Stbbdev 
53951c0b2f7Stbbdev         cachedHM = hModule;
54051c0b2f7Stbbdev         wcsncpy(cachedName, dllName, MAX_PATH);
54151c0b2f7Stbbdev     }
54251c0b2f7Stbbdev 
54351c0b2f7Stbbdev     FARPROC inpFunc = GetProcAddress(cachedHM, funcName);
54451c0b2f7Stbbdev     if (inpFunc == 0)
54551c0b2f7Stbbdev     {
54651c0b2f7Stbbdev         // Function was not found
54751c0b2f7Stbbdev         return FRR_NOFUNC;
54851c0b2f7Stbbdev     }
54951c0b2f7Stbbdev 
55051c0b2f7Stbbdev     if (!InsertTrampoline((void*)inpFunc, (void*)newFunc, opcodes, (void**)origFunc)){
55151c0b2f7Stbbdev         // Failed to insert the trampoline to the target address
55251c0b2f7Stbbdev         return FRR_FAILED;
55351c0b2f7Stbbdev     }
55451c0b2f7Stbbdev 
55551c0b2f7Stbbdev     return FRR_OK;
55651c0b2f7Stbbdev }
55751c0b2f7Stbbdev 
IsPrologueKnown(const char * dllName,const char * funcName,const char ** opcodes,HMODULE module)55851c0b2f7Stbbdev bool IsPrologueKnown(const char* dllName, const char *funcName, const char **opcodes, HMODULE module)
55951c0b2f7Stbbdev {
56051c0b2f7Stbbdev     FARPROC inpFunc = GetProcAddress(module, funcName);
56151c0b2f7Stbbdev     FunctionInfo functionInfo = { funcName, dllName };
56251c0b2f7Stbbdev 
56351c0b2f7Stbbdev     if (!inpFunc) {
56451c0b2f7Stbbdev         Log::record(functionInfo, "unknown", /*status*/ false);
56551c0b2f7Stbbdev         return false;
56651c0b2f7Stbbdev     }
56751c0b2f7Stbbdev 
56851c0b2f7Stbbdev     return CheckOpcodes( opcodes, (void*)inpFunc, /*abortOnError=*/false, &functionInfo) != 0;
56951c0b2f7Stbbdev }
57051c0b2f7Stbbdev 
57151c0b2f7Stbbdev // Public Windows API
TBB_malloc_replacement_log(char *** function_replacement_log_ptr)57251c0b2f7Stbbdev extern "C" __declspec(dllexport) int TBB_malloc_replacement_log(char *** function_replacement_log_ptr)
57351c0b2f7Stbbdev {
57457f524caSIlya Isaev     if (function_replacement_log_ptr != nullptr) {
57551c0b2f7Stbbdev         *function_replacement_log_ptr = Log::records;
57651c0b2f7Stbbdev     }
57751c0b2f7Stbbdev 
57851c0b2f7Stbbdev     // If we have no logs -> return false status
57957f524caSIlya Isaev     return Log::replacement_status && Log::records[0] != nullptr ? 0 : -1;
58051c0b2f7Stbbdev }
58151c0b2f7Stbbdev 
58251c0b2f7Stbbdev #endif /* !__TBB_WIN8UI_SUPPORT && defined(_WIN32) */
583