151c0b2f7Stbbdev /* 251c0b2f7Stbbdev Copyright (c) 2005-2020 Intel Corporation 351c0b2f7Stbbdev 451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License"); 551c0b2f7Stbbdev you may not use this file except in compliance with the License. 651c0b2f7Stbbdev You may obtain a copy of the License at 751c0b2f7Stbbdev 851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0 951c0b2f7Stbbdev 1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software 1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS, 1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1351c0b2f7Stbbdev See the License for the specific language governing permissions and 1451c0b2f7Stbbdev limitations under the License. 1551c0b2f7Stbbdev */ 1651c0b2f7Stbbdev 17*49e08aacStbbdev #include "oneapi/tbb/detail/_config.h" 18*49e08aacStbbdev #include "oneapi/tbb/detail/_assert.h" 1951c0b2f7Stbbdev #include "../tbb/assert_impl.h" 2051c0b2f7Stbbdev 2151c0b2f7Stbbdev #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32) 2251c0b2f7Stbbdev 2351c0b2f7Stbbdev #ifndef _CRT_SECURE_NO_DEPRECATE 2451c0b2f7Stbbdev #define _CRT_SECURE_NO_DEPRECATE 1 2551c0b2f7Stbbdev #endif 2651c0b2f7Stbbdev 2751c0b2f7Stbbdev // no standard-conforming implementation of snprintf prior to VS 2015 2851c0b2f7Stbbdev #if !defined(_MSC_VER) || _MSC_VER>=1900 2951c0b2f7Stbbdev #define LOG_PRINT(s, n, format, ...) snprintf(s, n, format, __VA_ARGS__) 3051c0b2f7Stbbdev #else 3151c0b2f7Stbbdev #define LOG_PRINT(s, n, format, ...) _snprintf_s(s, n, _TRUNCATE, format, __VA_ARGS__) 3251c0b2f7Stbbdev #endif 3351c0b2f7Stbbdev 3451c0b2f7Stbbdev #include <windows.h> 3551c0b2f7Stbbdev #include <new> 3651c0b2f7Stbbdev #include <stdio.h> 3751c0b2f7Stbbdev #include <string.h> 3851c0b2f7Stbbdev 3951c0b2f7Stbbdev #include "function_replacement.h" 4051c0b2f7Stbbdev 4151c0b2f7Stbbdev // The information about a standard memory allocation function for the replacement log 4251c0b2f7Stbbdev struct FunctionInfo { 4351c0b2f7Stbbdev const char* funcName; 4451c0b2f7Stbbdev const char* dllName; 4551c0b2f7Stbbdev }; 4651c0b2f7Stbbdev 4751c0b2f7Stbbdev // Namespace that processes and manages the output of records to the Log journal 4851c0b2f7Stbbdev // that will be provided to user by TBB_malloc_replacement_log() 4951c0b2f7Stbbdev namespace Log { 5051c0b2f7Stbbdev // Value of RECORDS_COUNT is set due to the fact that we maximally 5151c0b2f7Stbbdev // scan 8 modules, and in every module we can swap 6 opcodes. (rounded to 8) 5251c0b2f7Stbbdev static const unsigned RECORDS_COUNT = 8 * 8; 5351c0b2f7Stbbdev static const unsigned RECORD_LENGTH = MAX_PATH; 5451c0b2f7Stbbdev 5551c0b2f7Stbbdev // Need to add 1 to count of records, because last record must be always NULL 5651c0b2f7Stbbdev static char *records[RECORDS_COUNT + 1]; 5751c0b2f7Stbbdev static bool replacement_status = true; 5851c0b2f7Stbbdev 5951c0b2f7Stbbdev // Internal counter that contains number of next string for record 6051c0b2f7Stbbdev static unsigned record_number = 0; 6151c0b2f7Stbbdev 6251c0b2f7Stbbdev // Function that writes info about (not)found opcodes to the Log journal 6351c0b2f7Stbbdev // functionInfo - information about a standard memory allocation function for the replacement log 6451c0b2f7Stbbdev // opcodeString - string, that contain byte code of this function 6551c0b2f7Stbbdev // status - information about function replacement status 6651c0b2f7Stbbdev static void record(FunctionInfo functionInfo, const char * opcodeString, bool status) { 6751c0b2f7Stbbdev __TBB_ASSERT(functionInfo.dllName, "Empty DLL name value"); 6851c0b2f7Stbbdev __TBB_ASSERT(functionInfo.funcName, "Empty function name value"); 6951c0b2f7Stbbdev __TBB_ASSERT(opcodeString, "Empty opcode"); 7051c0b2f7Stbbdev __TBB_ASSERT(record_number <= RECORDS_COUNT, "Incorrect record number"); 7151c0b2f7Stbbdev 7251c0b2f7Stbbdev //If some replacement failed -> set status to false 7351c0b2f7Stbbdev replacement_status &= status; 7451c0b2f7Stbbdev 7551c0b2f7Stbbdev // If we reach the end of the log, write this message to the last line 7651c0b2f7Stbbdev if (record_number == RECORDS_COUNT) { 7751c0b2f7Stbbdev // %s - workaround to fix empty variable argument parsing behavior in GCC 7851c0b2f7Stbbdev LOG_PRINT(records[RECORDS_COUNT - 1], RECORD_LENGTH, "%s", "Log was truncated."); 7951c0b2f7Stbbdev return; 8051c0b2f7Stbbdev } 8151c0b2f7Stbbdev 8251c0b2f7Stbbdev char* entry = (char*)HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, RECORD_LENGTH); 8351c0b2f7Stbbdev __TBB_ASSERT(entry, "Invalid memory was returned"); 8451c0b2f7Stbbdev 8551c0b2f7Stbbdev LOG_PRINT(entry, RECORD_LENGTH, "%s: %s (%s), byte pattern: <%s>", 8651c0b2f7Stbbdev status ? "Success" : "Fail", functionInfo.funcName, functionInfo.dllName, opcodeString); 8751c0b2f7Stbbdev 8851c0b2f7Stbbdev records[record_number++] = entry; 8951c0b2f7Stbbdev } 9051c0b2f7Stbbdev }; 9151c0b2f7Stbbdev 9251c0b2f7Stbbdev inline UINT_PTR Ptr2Addrint(LPVOID ptr) 9351c0b2f7Stbbdev { 9451c0b2f7Stbbdev Int2Ptr i2p; 9551c0b2f7Stbbdev i2p.lpv = ptr; 9651c0b2f7Stbbdev return i2p.uip; 9751c0b2f7Stbbdev } 9851c0b2f7Stbbdev 9951c0b2f7Stbbdev inline LPVOID Addrint2Ptr(UINT_PTR ptr) 10051c0b2f7Stbbdev { 10151c0b2f7Stbbdev Int2Ptr i2p; 10251c0b2f7Stbbdev i2p.uip = ptr; 10351c0b2f7Stbbdev return i2p.lpv; 10451c0b2f7Stbbdev } 10551c0b2f7Stbbdev 10651c0b2f7Stbbdev // Is the distance between addr1 and addr2 smaller than dist 10751c0b2f7Stbbdev inline bool IsInDistance(UINT_PTR addr1, UINT_PTR addr2, __int64 dist) 10851c0b2f7Stbbdev { 10951c0b2f7Stbbdev __int64 diff = addr1>addr2 ? addr1-addr2 : addr2-addr1; 11051c0b2f7Stbbdev return diff<dist; 11151c0b2f7Stbbdev } 11251c0b2f7Stbbdev 11351c0b2f7Stbbdev /* 11451c0b2f7Stbbdev * When inserting a probe in 64 bits process the distance between the insertion 11551c0b2f7Stbbdev * point and the target may be bigger than 2^32. In this case we are using 11651c0b2f7Stbbdev * indirect jump through memory where the offset to this memory location 11751c0b2f7Stbbdev * is smaller than 2^32 and it contains the absolute address (8 bytes). 11851c0b2f7Stbbdev * 11951c0b2f7Stbbdev * This class is used to hold the pages used for the above trampolines. 12051c0b2f7Stbbdev * Since this utility will be used to replace malloc functions this implementation 12151c0b2f7Stbbdev * doesn't allocate memory dynamically. 12251c0b2f7Stbbdev * 12351c0b2f7Stbbdev * The struct MemoryBuffer holds the data about a page in the memory used for 12451c0b2f7Stbbdev * replacing functions in 64-bit code where the target is too far to be replaced 12551c0b2f7Stbbdev * with a short jump. All the calculations of m_base and m_next are in a multiple 12651c0b2f7Stbbdev * of SIZE_OF_ADDRESS (which is 8 in Win64). 12751c0b2f7Stbbdev */ 12851c0b2f7Stbbdev class MemoryProvider { 12951c0b2f7Stbbdev private: 13051c0b2f7Stbbdev struct MemoryBuffer { 13151c0b2f7Stbbdev UINT_PTR m_base; // base address of the buffer 13251c0b2f7Stbbdev UINT_PTR m_next; // next free location in the buffer 13351c0b2f7Stbbdev DWORD m_size; // size of buffer 13451c0b2f7Stbbdev 13551c0b2f7Stbbdev // Default constructor 13651c0b2f7Stbbdev MemoryBuffer() : m_base(0), m_next(0), m_size(0) {} 13751c0b2f7Stbbdev 13851c0b2f7Stbbdev // Constructor 13951c0b2f7Stbbdev MemoryBuffer(void *base, DWORD size) 14051c0b2f7Stbbdev { 14151c0b2f7Stbbdev m_base = Ptr2Addrint(base); 14251c0b2f7Stbbdev m_next = m_base; 14351c0b2f7Stbbdev m_size = size; 14451c0b2f7Stbbdev } 14551c0b2f7Stbbdev }; 14651c0b2f7Stbbdev 14751c0b2f7Stbbdev MemoryBuffer *CreateBuffer(UINT_PTR addr) 14851c0b2f7Stbbdev { 14951c0b2f7Stbbdev // No more room in the pages database 15051c0b2f7Stbbdev if (m_lastBuffer - m_pages == MAX_NUM_BUFFERS) 15151c0b2f7Stbbdev return 0; 15251c0b2f7Stbbdev 15351c0b2f7Stbbdev void *newAddr = Addrint2Ptr(addr); 15451c0b2f7Stbbdev // Get information for the region which the given address belongs to 15551c0b2f7Stbbdev MEMORY_BASIC_INFORMATION memInfo; 15651c0b2f7Stbbdev if (VirtualQuery(newAddr, &memInfo, sizeof(memInfo)) != sizeof(memInfo)) 15751c0b2f7Stbbdev return 0; 15851c0b2f7Stbbdev 15951c0b2f7Stbbdev for(;;) { 16051c0b2f7Stbbdev // The new address to check is beyond the current region and aligned to allocation size 16151c0b2f7Stbbdev newAddr = Addrint2Ptr( (Ptr2Addrint(memInfo.BaseAddress) + memInfo.RegionSize + m_allocSize) & ~(UINT_PTR)(m_allocSize-1) ); 16251c0b2f7Stbbdev 16351c0b2f7Stbbdev // Check that the address is in the right distance. 16451c0b2f7Stbbdev // VirtualAlloc can only round the address down; so it will remain in the right distance 16551c0b2f7Stbbdev if (!IsInDistance(addr, Ptr2Addrint(newAddr), MAX_DISTANCE)) 16651c0b2f7Stbbdev break; 16751c0b2f7Stbbdev 16851c0b2f7Stbbdev if (VirtualQuery(newAddr, &memInfo, sizeof(memInfo)) != sizeof(memInfo)) 16951c0b2f7Stbbdev break; 17051c0b2f7Stbbdev 17151c0b2f7Stbbdev if (memInfo.State == MEM_FREE && memInfo.RegionSize >= m_allocSize) 17251c0b2f7Stbbdev { 17351c0b2f7Stbbdev // Found a free region, try to allocate a page in this region 17451c0b2f7Stbbdev void *newPage = VirtualAlloc(newAddr, m_allocSize, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE); 17551c0b2f7Stbbdev if (!newPage) 17651c0b2f7Stbbdev break; 17751c0b2f7Stbbdev 17851c0b2f7Stbbdev // Add the new page to the pages database 17951c0b2f7Stbbdev MemoryBuffer *pBuff = new (m_lastBuffer) MemoryBuffer(newPage, m_allocSize); 18051c0b2f7Stbbdev ++m_lastBuffer; 18151c0b2f7Stbbdev return pBuff; 18251c0b2f7Stbbdev } 18351c0b2f7Stbbdev } 18451c0b2f7Stbbdev 18551c0b2f7Stbbdev // Failed to find a buffer in the distance 18651c0b2f7Stbbdev return 0; 18751c0b2f7Stbbdev } 18851c0b2f7Stbbdev 18951c0b2f7Stbbdev public: 19051c0b2f7Stbbdev MemoryProvider() 19151c0b2f7Stbbdev { 19251c0b2f7Stbbdev SYSTEM_INFO sysInfo; 19351c0b2f7Stbbdev GetSystemInfo(&sysInfo); 19451c0b2f7Stbbdev m_allocSize = sysInfo.dwAllocationGranularity; 19551c0b2f7Stbbdev m_lastBuffer = &m_pages[0]; 19651c0b2f7Stbbdev } 19751c0b2f7Stbbdev 19851c0b2f7Stbbdev // We can't free the pages in the destructor because the trampolines 19951c0b2f7Stbbdev // are using these memory locations and a replaced function might be called 20051c0b2f7Stbbdev // after the destructor was called. 20151c0b2f7Stbbdev ~MemoryProvider() 20251c0b2f7Stbbdev { 20351c0b2f7Stbbdev } 20451c0b2f7Stbbdev 20551c0b2f7Stbbdev // Return a memory location in distance less than 2^31 from input address 20651c0b2f7Stbbdev UINT_PTR GetLocation(UINT_PTR addr) 20751c0b2f7Stbbdev { 20851c0b2f7Stbbdev MemoryBuffer *pBuff = m_pages; 20951c0b2f7Stbbdev for (; pBuff<m_lastBuffer && IsInDistance(pBuff->m_next, addr, MAX_DISTANCE); ++pBuff) 21051c0b2f7Stbbdev { 21151c0b2f7Stbbdev if (pBuff->m_next < pBuff->m_base + pBuff->m_size) 21251c0b2f7Stbbdev { 21351c0b2f7Stbbdev UINT_PTR loc = pBuff->m_next; 21451c0b2f7Stbbdev pBuff->m_next += MAX_PROBE_SIZE; 21551c0b2f7Stbbdev return loc; 21651c0b2f7Stbbdev } 21751c0b2f7Stbbdev } 21851c0b2f7Stbbdev 21951c0b2f7Stbbdev pBuff = CreateBuffer(addr); 22051c0b2f7Stbbdev if(!pBuff) 22151c0b2f7Stbbdev return 0; 22251c0b2f7Stbbdev 22351c0b2f7Stbbdev UINT_PTR loc = pBuff->m_next; 22451c0b2f7Stbbdev pBuff->m_next += MAX_PROBE_SIZE; 22551c0b2f7Stbbdev return loc; 22651c0b2f7Stbbdev } 22751c0b2f7Stbbdev 22851c0b2f7Stbbdev private: 22951c0b2f7Stbbdev MemoryBuffer m_pages[MAX_NUM_BUFFERS]; 23051c0b2f7Stbbdev MemoryBuffer *m_lastBuffer; 23151c0b2f7Stbbdev DWORD m_allocSize; 23251c0b2f7Stbbdev }; 23351c0b2f7Stbbdev 23451c0b2f7Stbbdev static MemoryProvider memProvider; 23551c0b2f7Stbbdev 23651c0b2f7Stbbdev // Compare opcodes from dictionary (str1) and opcodes from code (str2) 23751c0b2f7Stbbdev // str1 might contain '*' to mask addresses 23851c0b2f7Stbbdev // RETURN: 0 if opcodes did not match, 1 on success 23951c0b2f7Stbbdev size_t compareStrings( const char *str1, const char *str2 ) 24051c0b2f7Stbbdev { 24151c0b2f7Stbbdev for (size_t i=0; str1[i]!=0; i++){ 24251c0b2f7Stbbdev if( str1[i]!='*' && str1[i]!='#' && str1[i]!=str2[i] ) return 0; 24351c0b2f7Stbbdev } 24451c0b2f7Stbbdev return 1; 24551c0b2f7Stbbdev } 24651c0b2f7Stbbdev 24751c0b2f7Stbbdev // Check function prologue with known prologues from the dictionary 24851c0b2f7Stbbdev // opcodes - dictionary 24951c0b2f7Stbbdev // inpAddr - pointer to function prologue 25051c0b2f7Stbbdev // Dictionary contains opcodes for several full asm instructions 25151c0b2f7Stbbdev // + one opcode byte for the next asm instruction for safe address processing 25251c0b2f7Stbbdev // RETURN: 1 + the index of the matched pattern, or 0 if no match found. 25351c0b2f7Stbbdev static UINT CheckOpcodes( const char ** opcodes, void *inpAddr, bool abortOnError, const FunctionInfo* functionInfo = NULL) 25451c0b2f7Stbbdev { 25551c0b2f7Stbbdev static size_t opcodesStringsCount = 0; 25651c0b2f7Stbbdev static size_t maxOpcodesLength = 0; 25751c0b2f7Stbbdev static size_t opcodes_pointer = (size_t)opcodes; 25851c0b2f7Stbbdev char opcodeString[2*MAX_PATTERN_SIZE+1]; 25951c0b2f7Stbbdev size_t i; 26051c0b2f7Stbbdev size_t result = 0; 26151c0b2f7Stbbdev 26251c0b2f7Stbbdev // Get the values for static variables 26351c0b2f7Stbbdev // max length and number of patterns 26451c0b2f7Stbbdev if( !opcodesStringsCount || opcodes_pointer != (size_t)opcodes ){ 26551c0b2f7Stbbdev while( *(opcodes + opcodesStringsCount)!= NULL ){ 26651c0b2f7Stbbdev if( (i=strlen(*(opcodes + opcodesStringsCount))) > maxOpcodesLength ) 26751c0b2f7Stbbdev maxOpcodesLength = i; 26851c0b2f7Stbbdev opcodesStringsCount++; 26951c0b2f7Stbbdev } 27051c0b2f7Stbbdev opcodes_pointer = (size_t)opcodes; 27151c0b2f7Stbbdev __TBB_ASSERT( maxOpcodesLength/2 <= MAX_PATTERN_SIZE, "Pattern exceeded the limit of 28 opcodes/56 symbols" ); 27251c0b2f7Stbbdev } 27351c0b2f7Stbbdev 27451c0b2f7Stbbdev // Translate prologue opcodes to string format to compare 27551c0b2f7Stbbdev for( i=0; i<maxOpcodesLength/2 && i<MAX_PATTERN_SIZE; ++i ){ 27651c0b2f7Stbbdev sprintf( opcodeString + 2*i, "%.2X", *((unsigned char*)inpAddr+i) ); 27751c0b2f7Stbbdev } 27851c0b2f7Stbbdev opcodeString[2*i] = 0; 27951c0b2f7Stbbdev 28051c0b2f7Stbbdev // Compare translated opcodes with patterns 28151c0b2f7Stbbdev for( UINT idx=0; idx<opcodesStringsCount; ++idx ){ 28251c0b2f7Stbbdev result = compareStrings( opcodes[idx],opcodeString ); 28351c0b2f7Stbbdev if( result ) { 28451c0b2f7Stbbdev if (functionInfo) { 28551c0b2f7Stbbdev Log::record(*functionInfo, opcodeString, /*status*/ true); 28651c0b2f7Stbbdev } 28751c0b2f7Stbbdev return idx + 1; // avoid 0 which indicates a failure 28851c0b2f7Stbbdev } 28951c0b2f7Stbbdev } 29051c0b2f7Stbbdev if (functionInfo) { 29151c0b2f7Stbbdev Log::record(*functionInfo, opcodeString, /*status*/ false); 29251c0b2f7Stbbdev } 29351c0b2f7Stbbdev if (abortOnError) { 29451c0b2f7Stbbdev // Impossibility to find opcodes in the dictionary is a serious issue, 29551c0b2f7Stbbdev // as if we unable to call original function, leak or crash is expected result. 29651c0b2f7Stbbdev __TBB_ASSERT_RELEASE( false, "CheckOpcodes failed" ); 29751c0b2f7Stbbdev } 29851c0b2f7Stbbdev return 0; 29951c0b2f7Stbbdev } 30051c0b2f7Stbbdev 30151c0b2f7Stbbdev // Modify offsets in original code after moving it to a trampoline. 30251c0b2f7Stbbdev // We do not have more than one offset to correct in existing opcode patterns. 30351c0b2f7Stbbdev static void CorrectOffset( UINT_PTR address, const char* pattern, UINT distance ) 30451c0b2f7Stbbdev { 30551c0b2f7Stbbdev const char* pos = strstr(pattern, "#*******"); 30651c0b2f7Stbbdev if( pos ) { 30751c0b2f7Stbbdev address += (pos - pattern)/2; // compute the offset position 30851c0b2f7Stbbdev UINT value; 30951c0b2f7Stbbdev // UINT assignment is not used to avoid potential alignment issues 31051c0b2f7Stbbdev memcpy(&value, Addrint2Ptr(address), sizeof(value)); 31151c0b2f7Stbbdev value += distance; 31251c0b2f7Stbbdev memcpy(Addrint2Ptr(address), &value, sizeof(value)); 31351c0b2f7Stbbdev } 31451c0b2f7Stbbdev } 31551c0b2f7Stbbdev 31651c0b2f7Stbbdev // Insert jump relative instruction to the input address 31751c0b2f7Stbbdev // RETURN: the size of the trampoline or 0 on failure 31851c0b2f7Stbbdev static DWORD InsertTrampoline32(void *inpAddr, void *targetAddr, const char* pattern, void** storedAddr) 31951c0b2f7Stbbdev { 32051c0b2f7Stbbdev size_t bytesToMove = SIZE_OF_RELJUMP; 32151c0b2f7Stbbdev UINT_PTR srcAddr = Ptr2Addrint(inpAddr); 32251c0b2f7Stbbdev UINT_PTR tgtAddr = Ptr2Addrint(targetAddr); 32351c0b2f7Stbbdev // Check that the target fits in 32 bits 32451c0b2f7Stbbdev if (!IsInDistance(srcAddr, tgtAddr, MAX_DISTANCE)) 32551c0b2f7Stbbdev return 0; 32651c0b2f7Stbbdev 32751c0b2f7Stbbdev UINT_PTR offset; 32851c0b2f7Stbbdev UINT offset32; 32951c0b2f7Stbbdev UCHAR *codePtr = (UCHAR *)inpAddr; 33051c0b2f7Stbbdev 33151c0b2f7Stbbdev if ( storedAddr ){ // If requested, store original function code 33251c0b2f7Stbbdev bytesToMove = strlen(pattern)/2-1; // The last byte matching the pattern must not be copied 33351c0b2f7Stbbdev __TBB_ASSERT_RELEASE( bytesToMove >= SIZE_OF_RELJUMP, "Incorrect bytecode pattern?" ); 33451c0b2f7Stbbdev UINT_PTR trampAddr = memProvider.GetLocation(srcAddr); 33551c0b2f7Stbbdev if (!trampAddr) 33651c0b2f7Stbbdev return 0; 33751c0b2f7Stbbdev *storedAddr = Addrint2Ptr(trampAddr); 33851c0b2f7Stbbdev // Set 'executable' flag for original instructions in the new place 33951c0b2f7Stbbdev DWORD pageFlags = PAGE_EXECUTE_READWRITE; 34051c0b2f7Stbbdev if (!VirtualProtect(*storedAddr, MAX_PROBE_SIZE, pageFlags, &pageFlags)) return 0; 34151c0b2f7Stbbdev // Copy original instructions to the new place 34251c0b2f7Stbbdev memcpy(*storedAddr, codePtr, bytesToMove); 34351c0b2f7Stbbdev offset = srcAddr - trampAddr; 34451c0b2f7Stbbdev offset32 = (UINT)(offset & 0xFFFFFFFF); 34551c0b2f7Stbbdev CorrectOffset( trampAddr, pattern, offset32 ); 34651c0b2f7Stbbdev // Set jump to the code after replacement 34751c0b2f7Stbbdev offset32 -= SIZE_OF_RELJUMP; 34851c0b2f7Stbbdev *(UCHAR*)(trampAddr+bytesToMove) = 0xE9; 34951c0b2f7Stbbdev memcpy((UCHAR*)(trampAddr+bytesToMove+1), &offset32, sizeof(offset32)); 35051c0b2f7Stbbdev } 35151c0b2f7Stbbdev 35251c0b2f7Stbbdev // The following will work correctly even if srcAddr>tgtAddr, as long as 35351c0b2f7Stbbdev // address difference is less than 2^31, which is guaranteed by IsInDistance. 35451c0b2f7Stbbdev offset = tgtAddr - srcAddr - SIZE_OF_RELJUMP; 35551c0b2f7Stbbdev offset32 = (UINT)(offset & 0xFFFFFFFF); 35651c0b2f7Stbbdev // Insert the jump to the new code 35751c0b2f7Stbbdev *codePtr = 0xE9; 35851c0b2f7Stbbdev memcpy(codePtr+1, &offset32, sizeof(offset32)); 35951c0b2f7Stbbdev 36051c0b2f7Stbbdev // Fill the rest with NOPs to correctly see disassembler of old code in debugger. 36151c0b2f7Stbbdev for( unsigned i=SIZE_OF_RELJUMP; i<bytesToMove; i++ ){ 36251c0b2f7Stbbdev *(codePtr+i) = 0x90; 36351c0b2f7Stbbdev } 36451c0b2f7Stbbdev 36551c0b2f7Stbbdev return SIZE_OF_RELJUMP; 36651c0b2f7Stbbdev } 36751c0b2f7Stbbdev 36851c0b2f7Stbbdev // This function is called when the offset doesn't fit in 32 bits 36951c0b2f7Stbbdev // 1 Find and allocate a page in the small distance (<2^31) from input address 37051c0b2f7Stbbdev // 2 Put jump RIP relative indirect through the address in the close page 37151c0b2f7Stbbdev // 3 Put the absolute address of the target in the allocated location 37251c0b2f7Stbbdev // RETURN: the size of the trampoline or 0 on failure 37351c0b2f7Stbbdev static DWORD InsertTrampoline64(void *inpAddr, void *targetAddr, const char* pattern, void** storedAddr) 37451c0b2f7Stbbdev { 37551c0b2f7Stbbdev size_t bytesToMove = SIZE_OF_INDJUMP; 37651c0b2f7Stbbdev 37751c0b2f7Stbbdev UINT_PTR srcAddr = Ptr2Addrint(inpAddr); 37851c0b2f7Stbbdev UINT_PTR tgtAddr = Ptr2Addrint(targetAddr); 37951c0b2f7Stbbdev 38051c0b2f7Stbbdev // Get a location close to the source address 38151c0b2f7Stbbdev UINT_PTR location = memProvider.GetLocation(srcAddr); 38251c0b2f7Stbbdev if (!location) 38351c0b2f7Stbbdev return 0; 38451c0b2f7Stbbdev 38551c0b2f7Stbbdev UINT_PTR offset; 38651c0b2f7Stbbdev UINT offset32; 38751c0b2f7Stbbdev UCHAR *codePtr = (UCHAR *)inpAddr; 38851c0b2f7Stbbdev 38951c0b2f7Stbbdev // Fill the location 39051c0b2f7Stbbdev UINT_PTR *locPtr = (UINT_PTR *)Addrint2Ptr(location); 39151c0b2f7Stbbdev *locPtr = tgtAddr; 39251c0b2f7Stbbdev 39351c0b2f7Stbbdev if ( storedAddr ){ // If requested, store original function code 39451c0b2f7Stbbdev bytesToMove = strlen(pattern)/2-1; // The last byte matching the pattern must not be copied 39551c0b2f7Stbbdev __TBB_ASSERT_RELEASE( bytesToMove >= SIZE_OF_INDJUMP, "Incorrect bytecode pattern?" ); 39651c0b2f7Stbbdev UINT_PTR trampAddr = memProvider.GetLocation(srcAddr); 39751c0b2f7Stbbdev if (!trampAddr) 39851c0b2f7Stbbdev return 0; 39951c0b2f7Stbbdev *storedAddr = Addrint2Ptr(trampAddr); 40051c0b2f7Stbbdev // Set 'executable' flag for original instructions in the new place 40151c0b2f7Stbbdev DWORD pageFlags = PAGE_EXECUTE_READWRITE; 40251c0b2f7Stbbdev if (!VirtualProtect(*storedAddr, MAX_PROBE_SIZE, pageFlags, &pageFlags)) return 0; 40351c0b2f7Stbbdev // Copy original instructions to the new place 40451c0b2f7Stbbdev memcpy(*storedAddr, codePtr, bytesToMove); 40551c0b2f7Stbbdev offset = srcAddr - trampAddr; 40651c0b2f7Stbbdev offset32 = (UINT)(offset & 0xFFFFFFFF); 40751c0b2f7Stbbdev CorrectOffset( trampAddr, pattern, offset32 ); 40851c0b2f7Stbbdev // Set jump to the code after replacement. It is within the distance of relative jump! 40951c0b2f7Stbbdev offset32 -= SIZE_OF_RELJUMP; 41051c0b2f7Stbbdev *(UCHAR*)(trampAddr+bytesToMove) = 0xE9; 41151c0b2f7Stbbdev memcpy((UCHAR*)(trampAddr+bytesToMove+1), &offset32, sizeof(offset32)); 41251c0b2f7Stbbdev } 41351c0b2f7Stbbdev 41451c0b2f7Stbbdev // Fill the buffer 41551c0b2f7Stbbdev offset = location - srcAddr - SIZE_OF_INDJUMP; 41651c0b2f7Stbbdev offset32 = (UINT)(offset & 0xFFFFFFFF); 41751c0b2f7Stbbdev *(codePtr) = 0xFF; 41851c0b2f7Stbbdev *(codePtr+1) = 0x25; 41951c0b2f7Stbbdev memcpy(codePtr+2, &offset32, sizeof(offset32)); 42051c0b2f7Stbbdev 42151c0b2f7Stbbdev // Fill the rest with NOPs to correctly see disassembler of old code in debugger. 42251c0b2f7Stbbdev for( unsigned i=SIZE_OF_INDJUMP; i<bytesToMove; i++ ){ 42351c0b2f7Stbbdev *(codePtr+i) = 0x90; 42451c0b2f7Stbbdev } 42551c0b2f7Stbbdev 42651c0b2f7Stbbdev return SIZE_OF_INDJUMP; 42751c0b2f7Stbbdev } 42851c0b2f7Stbbdev 42951c0b2f7Stbbdev // Insert a jump instruction in the inpAddr to the targetAddr 43051c0b2f7Stbbdev // 1. Get the memory protection of the page containing the input address 43151c0b2f7Stbbdev // 2. Change the memory protection to writable 43251c0b2f7Stbbdev // 3. Call InsertTrampoline32 or InsertTrampoline64 43351c0b2f7Stbbdev // 4. Restore memory protection 43451c0b2f7Stbbdev // RETURN: FALSE on failure, TRUE on success 43551c0b2f7Stbbdev static bool InsertTrampoline(void *inpAddr, void *targetAddr, const char ** opcodes, void** origFunc) 43651c0b2f7Stbbdev { 43751c0b2f7Stbbdev DWORD probeSize; 43851c0b2f7Stbbdev // Change page protection to EXECUTE+WRITE 43951c0b2f7Stbbdev DWORD origProt = 0; 44051c0b2f7Stbbdev if (!VirtualProtect(inpAddr, MAX_PROBE_SIZE, PAGE_EXECUTE_WRITECOPY, &origProt)) 44151c0b2f7Stbbdev return FALSE; 44251c0b2f7Stbbdev 44351c0b2f7Stbbdev const char* pattern = NULL; 44451c0b2f7Stbbdev if ( origFunc ){ // Need to store original function code 44551c0b2f7Stbbdev UCHAR * const codePtr = (UCHAR *)inpAddr; 44651c0b2f7Stbbdev if ( *codePtr == 0xE9 ){ // JMP relative instruction 44751c0b2f7Stbbdev // For the special case when a system function consists of a single near jump, 44851c0b2f7Stbbdev // instead of moving it somewhere we use the target of the jump as the original function. 44951c0b2f7Stbbdev unsigned offsetInJmp = *(unsigned*)(codePtr + 1); 45051c0b2f7Stbbdev *origFunc = (void*)(Ptr2Addrint(inpAddr) + offsetInJmp + SIZE_OF_RELJUMP); 45151c0b2f7Stbbdev origFunc = NULL; // now it must be ignored by InsertTrampoline32/64 45251c0b2f7Stbbdev } else { 45351c0b2f7Stbbdev // find the right opcode pattern 45451c0b2f7Stbbdev UINT opcodeIdx = CheckOpcodes( opcodes, inpAddr, /*abortOnError=*/true ); 45551c0b2f7Stbbdev __TBB_ASSERT( opcodeIdx > 0, "abortOnError ignored in CheckOpcodes?" ); 45651c0b2f7Stbbdev pattern = opcodes[opcodeIdx-1]; // -1 compensates for +1 in CheckOpcodes 45751c0b2f7Stbbdev } 45851c0b2f7Stbbdev } 45951c0b2f7Stbbdev 46051c0b2f7Stbbdev probeSize = InsertTrampoline32(inpAddr, targetAddr, pattern, origFunc); 46151c0b2f7Stbbdev if (!probeSize) 46251c0b2f7Stbbdev probeSize = InsertTrampoline64(inpAddr, targetAddr, pattern, origFunc); 46351c0b2f7Stbbdev 46451c0b2f7Stbbdev // Restore original protection 46551c0b2f7Stbbdev VirtualProtect(inpAddr, MAX_PROBE_SIZE, origProt, &origProt); 46651c0b2f7Stbbdev 46751c0b2f7Stbbdev if (!probeSize) 46851c0b2f7Stbbdev return FALSE; 46951c0b2f7Stbbdev 47051c0b2f7Stbbdev FlushInstructionCache(GetCurrentProcess(), inpAddr, probeSize); 47151c0b2f7Stbbdev FlushInstructionCache(GetCurrentProcess(), origFunc, probeSize); 47251c0b2f7Stbbdev 47351c0b2f7Stbbdev return TRUE; 47451c0b2f7Stbbdev } 47551c0b2f7Stbbdev 47651c0b2f7Stbbdev // Routine to replace the functions 47751c0b2f7Stbbdev // TODO: replace opcodesNumber with opcodes and opcodes number to check if we replace right code. 47851c0b2f7Stbbdev FRR_TYPE ReplaceFunctionA(const char *dllName, const char *funcName, FUNCPTR newFunc, const char ** opcodes, FUNCPTR* origFunc) 47951c0b2f7Stbbdev { 48051c0b2f7Stbbdev // Cache the results of the last search for the module 48151c0b2f7Stbbdev // Assume that there was no DLL unload between 48251c0b2f7Stbbdev static char cachedName[MAX_PATH+1]; 48351c0b2f7Stbbdev static HMODULE cachedHM = 0; 48451c0b2f7Stbbdev 48551c0b2f7Stbbdev if (!dllName || !*dllName) 48651c0b2f7Stbbdev return FRR_NODLL; 48751c0b2f7Stbbdev 48851c0b2f7Stbbdev if (!cachedHM || strncmp(dllName, cachedName, MAX_PATH) != 0) 48951c0b2f7Stbbdev { 49051c0b2f7Stbbdev // Find the module handle for the input dll 49151c0b2f7Stbbdev HMODULE hModule = GetModuleHandleA(dllName); 49251c0b2f7Stbbdev if (hModule == 0) 49351c0b2f7Stbbdev { 49451c0b2f7Stbbdev // Couldn't find the module with the input name 49551c0b2f7Stbbdev cachedHM = 0; 49651c0b2f7Stbbdev return FRR_NODLL; 49751c0b2f7Stbbdev } 49851c0b2f7Stbbdev 49951c0b2f7Stbbdev cachedHM = hModule; 50051c0b2f7Stbbdev strncpy(cachedName, dllName, MAX_PATH); 50151c0b2f7Stbbdev } 50251c0b2f7Stbbdev 50351c0b2f7Stbbdev FARPROC inpFunc = GetProcAddress(cachedHM, funcName); 50451c0b2f7Stbbdev if (inpFunc == 0) 50551c0b2f7Stbbdev { 50651c0b2f7Stbbdev // Function was not found 50751c0b2f7Stbbdev return FRR_NOFUNC; 50851c0b2f7Stbbdev } 50951c0b2f7Stbbdev 51051c0b2f7Stbbdev if (!InsertTrampoline((void*)inpFunc, (void*)newFunc, opcodes, (void**)origFunc)){ 51151c0b2f7Stbbdev // Failed to insert the trampoline to the target address 51251c0b2f7Stbbdev return FRR_FAILED; 51351c0b2f7Stbbdev } 51451c0b2f7Stbbdev 51551c0b2f7Stbbdev return FRR_OK; 51651c0b2f7Stbbdev } 51751c0b2f7Stbbdev 51851c0b2f7Stbbdev FRR_TYPE ReplaceFunctionW(const wchar_t *dllName, const char *funcName, FUNCPTR newFunc, const char ** opcodes, FUNCPTR* origFunc) 51951c0b2f7Stbbdev { 52051c0b2f7Stbbdev // Cache the results of the last search for the module 52151c0b2f7Stbbdev // Assume that there was no DLL unload between 52251c0b2f7Stbbdev static wchar_t cachedName[MAX_PATH+1]; 52351c0b2f7Stbbdev static HMODULE cachedHM = 0; 52451c0b2f7Stbbdev 52551c0b2f7Stbbdev if (!dllName || !*dllName) 52651c0b2f7Stbbdev return FRR_NODLL; 52751c0b2f7Stbbdev 52851c0b2f7Stbbdev if (!cachedHM || wcsncmp(dllName, cachedName, MAX_PATH) != 0) 52951c0b2f7Stbbdev { 53051c0b2f7Stbbdev // Find the module handle for the input dll 53151c0b2f7Stbbdev HMODULE hModule = GetModuleHandleW(dllName); 53251c0b2f7Stbbdev if (hModule == 0) 53351c0b2f7Stbbdev { 53451c0b2f7Stbbdev // Couldn't find the module with the input name 53551c0b2f7Stbbdev cachedHM = 0; 53651c0b2f7Stbbdev return FRR_NODLL; 53751c0b2f7Stbbdev } 53851c0b2f7Stbbdev 53951c0b2f7Stbbdev cachedHM = hModule; 54051c0b2f7Stbbdev wcsncpy(cachedName, dllName, MAX_PATH); 54151c0b2f7Stbbdev } 54251c0b2f7Stbbdev 54351c0b2f7Stbbdev FARPROC inpFunc = GetProcAddress(cachedHM, funcName); 54451c0b2f7Stbbdev if (inpFunc == 0) 54551c0b2f7Stbbdev { 54651c0b2f7Stbbdev // Function was not found 54751c0b2f7Stbbdev return FRR_NOFUNC; 54851c0b2f7Stbbdev } 54951c0b2f7Stbbdev 55051c0b2f7Stbbdev if (!InsertTrampoline((void*)inpFunc, (void*)newFunc, opcodes, (void**)origFunc)){ 55151c0b2f7Stbbdev // Failed to insert the trampoline to the target address 55251c0b2f7Stbbdev return FRR_FAILED; 55351c0b2f7Stbbdev } 55451c0b2f7Stbbdev 55551c0b2f7Stbbdev return FRR_OK; 55651c0b2f7Stbbdev } 55751c0b2f7Stbbdev 55851c0b2f7Stbbdev bool IsPrologueKnown(const char* dllName, const char *funcName, const char **opcodes, HMODULE module) 55951c0b2f7Stbbdev { 56051c0b2f7Stbbdev FARPROC inpFunc = GetProcAddress(module, funcName); 56151c0b2f7Stbbdev FunctionInfo functionInfo = { funcName, dllName }; 56251c0b2f7Stbbdev 56351c0b2f7Stbbdev if (!inpFunc) { 56451c0b2f7Stbbdev Log::record(functionInfo, "unknown", /*status*/ false); 56551c0b2f7Stbbdev return false; 56651c0b2f7Stbbdev } 56751c0b2f7Stbbdev 56851c0b2f7Stbbdev return CheckOpcodes( opcodes, (void*)inpFunc, /*abortOnError=*/false, &functionInfo) != 0; 56951c0b2f7Stbbdev } 57051c0b2f7Stbbdev 57151c0b2f7Stbbdev // Public Windows API 57251c0b2f7Stbbdev extern "C" __declspec(dllexport) int TBB_malloc_replacement_log(char *** function_replacement_log_ptr) 57351c0b2f7Stbbdev { 57451c0b2f7Stbbdev if (function_replacement_log_ptr != NULL) { 57551c0b2f7Stbbdev *function_replacement_log_ptr = Log::records; 57651c0b2f7Stbbdev } 57751c0b2f7Stbbdev 57851c0b2f7Stbbdev // If we have no logs -> return false status 57951c0b2f7Stbbdev return Log::replacement_status && Log::records[0] != NULL ? 0 : -1; 58051c0b2f7Stbbdev } 58151c0b2f7Stbbdev 58251c0b2f7Stbbdev #endif /* !__TBB_WIN8UI_SUPPORT && defined(_WIN32) */ 583