用户态中断定时器访问,例如通过 KeQueryInterruptTime (或类似)

发布于 2024-12-17 14:18:37 字数 721 浏览 0 评论 0原文

是否有“Nt”或类似的(即非内核模式驱动程序)函数相当于KeQueryInterruptTime或类似的东西?好像没有NtQueryInterruptTime这样的东西,至少我没有发现。

我想要的是某种相当准确、可靠、单调的计时器(因此不是 QPC),它相当有效,并且不会像溢出的 32 位计数器那样带来意外,并且没有不必要的“智能” ,没有时区,也没有复杂的结构。

因此,理想情况下,我想要类似 timeGetTime 的 64 位值。它甚至不必是同一个计时器。
从 Vista 开始就存在 GetTickCount64 ,这本身是可以接受的,但我不想仅仅因为这样一个愚蠢的原因就破坏 XP 支持。

读取 0x7FFE0008 处的四字,如此处所示...好吧,有效 ...并且它证明实际的内部计数器在 XP 下确实是 64 位(它也是尽可能快的),但是呃……我们先不讨论读取某些未知的硬编码内存位置是一种多么令人讨厌的黑客行为。

在调用人为愚弄的(将 64 位计数器缩小到 32 位)高级 API 函数和读取原始内存地址之间肯定存在某些东西?

Is there a "Nt" or similar (i.e. non-kernelmode-driver) function equivalent for KeQueryInterruptTime or anything similar? There seems to be no such thing as NtQueryInterruptTime, at least I've not found it.

What I want is some kind of reasonably accurate and reliable, monotonic timer (thus not QPC) which is reasonably efficient and doesn't have surprises as an overflowing 32-bit counter, and no unnecessary "smartness", no time zones, or complicated structures.

So ideally, I want something like timeGetTime with a 64 bit value. It doesn't even have to be the same timer.
There exists GetTickCount64 starting with Vista, which would be acceptable as such, but I'd not like to break XP support only for such a stupid reason.

Reading the quadword at 0x7FFE0008 as indicated here ... well, works ... and it proves that indeed the actual internal counter is 64 bits under XP (it's also as fast as it could possibly get), but meh... let's not talk about what a kind of nasty hack it is to read some unknown, hardcoded memory location.

There must certainly be something in between calling an artificially stupefied (scaling a 64 bit counter down to 32 bits) high-level API function and reading a raw memory address?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(3

旧街凉风 2024-12-24 14:18:37

下面是 GetTickCount() 的线程安全包装器示例,它将刻度计数值扩展到 64 位,并且等效于 GetTickCount64()。

为了避免意外的计数器翻转,请确保每 49.7 天调用此函数几次。您甚至可以有一个专用线程,其唯一目的是调用此函数,然后在无限循环中休眠大约 20 天。

ULONGLONG MyGetTickCount64(void)
{
  static volatile LONGLONG Count = 0;
  LONGLONG curCount1, curCount2;
  LONGLONG tmp;

  curCount1 = InterlockedCompareExchange64(&Count, 0, 0);

  curCount2 = curCount1 & 0xFFFFFFFF00000000;
  curCount2 |= GetTickCount();

  if ((ULONG)curCount2 < (ULONG)curCount1)
  {
    curCount2 += 0x100000000;
  }

  tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1);

  if (tmp == curCount1)
  {
    return curCount2;
  }
  else
  {
    return tmp;
  }
}

编辑:这是一个测试 MyGetTickCount64() 的完整应用程序。

// Compiled with Open Watcom C 1.9: wcl386.exe /we /wx /q gettick.c

#include <windows.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>

//
// The below code is an ugly implementation of InterlockedCompareExchange64()
// that is apparently missing in Open Watcom C 1.9.
// It must work with MSVC++ too, however.
//
UINT8 Cmpxchg8bData[] =
{
  0x55,             // push      ebp
  0x89, 0xE5,       // mov       ebp, esp
  0x57,             // push      edi
  0x51,             // push      ecx
  0x53,             // push      ebx
  0x8B, 0x7D, 0x10, // mov       edi, [ebp + 0x10]
  0x8B, 0x07,       // mov       eax, [edi]
  0x8B, 0x57, 0x04, // mov       edx, [edi + 0x4]
  0x8B, 0x7D, 0x0C, // mov       edi, [ebp + 0xc]
  0x8B, 0x1F,       // mov       ebx, [edi]
  0x8B, 0x4F, 0x04, // mov       ecx, [edi + 0x4]
  0x8B, 0x7D, 0x08, // mov       edi, [ebp + 0x8]
  0xF0,             // lock:
  0x0F, 0xC7, 0x0F, // cmpxchg8b [edi]
  0x5B,             // pop       ebx
  0x59,             // pop       ecx
  0x5F,             // pop       edi
  0x5D,             // pop       ebp
  0xC3              // ret
};

LONGLONG (__cdecl *Cmpxchg8b)(LONGLONG volatile* Dest, LONGLONG* Exch, LONGLONG* Comp) =
  (LONGLONG (__cdecl *)(LONGLONG volatile*, LONGLONG*, LONGLONG*))Cmpxchg8bData;

LONGLONG MyInterlockedCompareExchange64(LONGLONG volatile* Destination,
                                        LONGLONG Exchange,
                                        LONGLONG Comparand)
{
  return Cmpxchg8b(Destination, &Exchange, &Comparand);
}

#ifdef InterlockedCompareExchange64
#undef InterlockedCompareExchange64
#endif

#define InterlockedCompareExchange64(Destination, Exchange, Comparand) \
  MyInterlockedCompareExchange64(Destination, Exchange, Comparand)

//
// This stuff makes a thread-safe printf().
// We don't want characters output by one thread to be mixed
// with characters output by another. We want printf() to be
// "atomic".
// We use a critical section around vprintf() to achieve "atomicity".
//
static CRITICAL_SECTION PrintfCriticalSection;

int ts_printf(const char* Format, ...)
{
  int count;
  va_list ap;

  EnterCriticalSection(&PrintfCriticalSection);

  va_start(ap, Format);
  count = vprintf(Format, ap);
  va_end(ap);

  LeaveCriticalSection(&PrintfCriticalSection);

  return count;
}

#define TICK_COUNT_10MS_INCREMENT 0x800000

//
// This is the simulated tick counter.
// Its low 32 bits are going to be returned by
// our, simulated, GetTickCount().
//
// TICK_COUNT_10MS_INCREMENT is what the counter is
// incremented by every time. The value is so chosen
// that the counter quickly overflows in its
// low 32 bits.
//
static volatile LONGLONG SimulatedTickCount = 0;

//
// This is our simulated 32-bit GetTickCount()
// that returns a count that often overflows.
//
ULONG SimulatedGetTickCount(void)
{
  return (ULONG)SimulatedTickCount;
}

//
// This thread function will increment the simulated tick counter
// whose value's low 32 bits we'll be reading in SimulatedGetTickCount().
//
DWORD WINAPI SimulatedTickThread(LPVOID lpParameter)
{
  UNREFERENCED_PARAMETER(lpParameter);

  for (;;)
  {
    LONGLONG c;

    Sleep(10);

    // Get the counter value, add TICK_COUNT_10MS_INCREMENT to it and
    // store the result back.
    c = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0);
    InterlockedCompareExchange64(&SimulatedTickCount, c + TICK_COUNT_10MS_INCREMENT, c) != c);
  }

  return 0;
}

volatile LONG CountOfObserved32bitOverflows = 0;
volatile LONG CountOfObservedUpdateRaces = 0;

//
// This prints statistics that includes the true 64-bit value of
// SimulatedTickCount that we can't get from SimulatedGetTickCount() as it
// returns only its lower 32 bits.
//
// The stats also include:
// - the number of times that MyGetTickCount64() observes an overflow of
//   SimulatedGetTickCount()
// - the number of times MyGetTickCount64() fails to update its internal
//   counter because of a concurrent update in another thread.
//
void PrintStats(void)
{
  LONGLONG true64bitCounter = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0);

  ts_printf("  0x%08X`%08X <- true 64-bit count; ovfs: ~%d; races: %d\n",
            (ULONG)(true64bitCounter >> 32),
            (ULONG)true64bitCounter,
            CountOfObserved32bitOverflows,
            CountOfObservedUpdateRaces);
}

//
// This is our poor man's implementation of GetTickCount64()
// on top of GetTickCount().
//
// It's thread safe.
//
// When used with actual GetTickCount() instead of SimulatedGetTickCount()
// it must be called at least a few times in 49.7 days to ensure that
// it doesn't miss any overflows in GetTickCount()'s return value.
//
ULONGLONG MyGetTickCount64(void)
{
  static volatile LONGLONG Count = 0;
  LONGLONG curCount1, curCount2;
  LONGLONG tmp;

  curCount1 = InterlockedCompareExchange64(&Count, 0, 0);

  curCount2 = curCount1 & 0xFFFFFFFF00000000;
  curCount2 |= SimulatedGetTickCount();

  if ((ULONG)curCount2 < (ULONG)curCount1)
  {
    curCount2 += 0x100000000;

    InterlockedIncrement(&CountOfObserved32bitOverflows);
  }

  tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1);

  if (tmp != curCount1)
  {
    curCount2 = tmp;

    InterlockedIncrement(&CountOfObservedUpdateRaces);
  }

  return curCount2;
}

//
// This is an error counter. If a thread that uses MyGetTickCount64() notices
// any problem with what MyGetTickCount64() returns, it bumps up this error
// counter and stops. If one of threads sees a non-zero value in this
// counter due to an error in another thread, it stops as well.
//
volatile LONG Error = 0;

//
// This is a thread function that will be using MyGetTickCount64(),
// validating its return value and printing some stats once in a while.
//
// This function is meant to execute concurrently in multiple threads
// to create race conditions inside of MyGetTickCount64() and test it.
//
DWORD WINAPI TickUserThread(LPVOID lpParameter)
{
  DWORD user = (DWORD)lpParameter; // thread number
  ULONGLONG ticks[4];

  ticks[3] = ticks[2] = ticks[1] = MyGetTickCount64();

  while (!Error)
  {
    ticks[0] = ticks[1];
    ticks[1] = MyGetTickCount64();

    // Every ~100 ms sleep a little (slightly lowers CPU load, to about 90%)
    if (ticks[1] > ticks[2] + TICK_COUNT_10MS_INCREMENT * 10L)
    {
      ticks[2] = ticks[1];
      Sleep(1 + rand() % 20);
    }

    // Every ~1000 ms print the last value from MyGetTickCount64().
    // Thread 1 also prints stats here.
    if (ticks[1] > ticks[3] + TICK_COUNT_10MS_INCREMENT * 100L)
    {
      ticks[3] = ticks[1];
      ts_printf("%u:0x%08X`%08X\n", user, (ULONG)(ticks[1] >> 32), (ULONG)ticks[1]);

      if (user == 1)
      {
        PrintStats();
      }
    }

    if (ticks[0] > ticks[1])
    {
      ts_printf("%u:Non-monotonic tick counts: 0x%016llX > 0x%016llX!\n",
                user,
                ticks[0],
                ticks[1]);
      PrintStats();
      InterlockedIncrement(&Error);
      return -1;
    }
    else if (ticks[0] + 0x100000000 <= ticks[1])
    {
      ts_printf("%u:Too big tick count jump: 0x%016llX -> 0x%016llX!\n",
                user,
                ticks[0],
                ticks[1]);
      PrintStats();
      InterlockedIncrement(&Error);
      return -1;
    }

    Sleep(0); // be nice, yield to other threads.
  }

  return 0;
}

//
// This prints stats upon Ctrl+C and terminates the program.
//
BOOL WINAPI ConsoleEventHandler(DWORD Event)
{
  if (Event == CTRL_C_EVENT)
  {
    PrintStats();
  }

  return FALSE;
}

int main(void)
{
  HANDLE simulatedTickThreadHandle;
  HANDLE tickUserThreadHandle;
  DWORD dummy;

  // This is for the missing InterlockedCompareExchange64() workaround.
  VirtualProtect(Cmpxchg8bData, sizeof(Cmpxchg8bData), PAGE_EXECUTE_READWRITE, &dummy);

  InitializeCriticalSection(&PrintfCriticalSection);

  if (!SetConsoleCtrlHandler(&ConsoleEventHandler, TRUE))
  {
    ts_printf("SetConsoleCtrlHandler(&ConsoleEventHandler) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // Start the tick simulator thread.

  simulatedTickThreadHandle = CreateThread(NULL, 0, &SimulatedTickThread, NULL, 0, NULL);

  if (simulatedTickThreadHandle == NULL)
  {
    ts_printf("CreateThread(&SimulatedTickThread) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // Start one thread that'll be using MyGetTickCount64().

  tickUserThreadHandle = CreateThread(NULL, 0, &TickUserThread, (LPVOID)2, 0, NULL);
  if (tickUserThreadHandle == NULL)
  {
    ts_printf("CreateThread(&TickUserThread) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // The other thread using MyGetTickCount64() will be the main thread.

  TickUserThread((LPVOID)1);

  //
  // The app terminates upon any error condition detected in TickUserThread()
  // in any of the threads or by Ctrl+C.
  //

  return 0;
}

作为测试,我在 Windows XP 下在一台具有 2 个 CPU 的闲置计算机上运行此测试应用程序 5 个多小时(闲置,以避免潜在的长时间饥饿时间,从而避免丢失每 5 秒发生一次的计数器溢出),它是仍然表现良好。

以下是控制台的最新输出:

2:0x00000E1B`C8800000
1:0x00000E1B`FA800000
  0x00000E1B`FA800000 <- true 64-bit count; ovfs: ~3824; races: 110858

如您所见,MyGetTickCount64() 已观察到 3824 个 32 位溢出,并且无法使用其第二个 更新 Count 的值>InterlockedCompareExchange64() 110858 次。因此,确实发生了溢出,最后一个数字意味着该变量实际上是由两个线程同时更新的。

您还可以看到,两个线程从 TickUserThread() 中的 MyGetTickCount64() 接收的 64 位刻度计数在前 32 位中没有丢失任何内容,并且非常接近 SimulatedTickCount 中的实际 64 位刻度计数,其 32 个低位由 SimulatedGetTickCount() 返回。由于线程调度和不频繁的统计打印,0x00000E1BC8800000 在视觉上落后于 0x00000E1BFA800000,它落后了 100*TICK_COUNT_10MS_INCREMENT,即 1 秒。当然,在内部,差异要小得多。

现在,关于 InterlockedCompareExchange64() 的可用性...有点奇怪,它是 自 Windows Vista 和 Windows Server 2003 起正式提供。 Server 2003 实际上是基于与 Windows XP 相同的代码库构建的。

但这里最重要的是,该函数建立在 Pentium CMPXCHG8B 指令之上,该指令自 1998 年或更早以来就可用 (1), (2)。我可以在 Windows XP (SP3) 二进制文件中看到这条指令。它位于 ntkrnlpa.exe/ntoskrnl.exe(内核)和 ntdll.dll(导出内核的 NtXxxx() 函数的 DLL)中。建立在)。查找 0xF0、0x0F、0xC7 字节序列,并反汇编该位置周围的代码,看看这些字节是否巧合存在。

您可以通过CPUID指令(CPUID函数0x00000001和函数0x80000001的EDX位8)检查该指令的可用性,如果该指令不存在,则拒绝运行而不是崩溃,但现在您'不太可能找到不支持该指令的机器。如果您这样做,那么它就不是一台适用于 Windows XP 的好机器,而且可能也不适用于您的应用程序。

Here's an example of a thread-safe wrapper for GetTickCount() extending the tick count value to 64 bits and in that being equivalent to GetTickCount64().

To avoid undesired counter roll overs, make sure to call this function a few times every 49.7 days. You can even have a dedicated thread whose only purpose would be to call this function and then sleep some 20 days in an infinite loop.

ULONGLONG MyGetTickCount64(void)
{
  static volatile LONGLONG Count = 0;
  LONGLONG curCount1, curCount2;
  LONGLONG tmp;

  curCount1 = InterlockedCompareExchange64(&Count, 0, 0);

  curCount2 = curCount1 & 0xFFFFFFFF00000000;
  curCount2 |= GetTickCount();

  if ((ULONG)curCount2 < (ULONG)curCount1)
  {
    curCount2 += 0x100000000;
  }

  tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1);

  if (tmp == curCount1)
  {
    return curCount2;
  }
  else
  {
    return tmp;
  }
}

EDIT: And here's a complete application that tests MyGetTickCount64().

// Compiled with Open Watcom C 1.9: wcl386.exe /we /wx /q gettick.c

#include <windows.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>

//
// The below code is an ugly implementation of InterlockedCompareExchange64()
// that is apparently missing in Open Watcom C 1.9.
// It must work with MSVC++ too, however.
//
UINT8 Cmpxchg8bData[] =
{
  0x55,             // push      ebp
  0x89, 0xE5,       // mov       ebp, esp
  0x57,             // push      edi
  0x51,             // push      ecx
  0x53,             // push      ebx
  0x8B, 0x7D, 0x10, // mov       edi, [ebp + 0x10]
  0x8B, 0x07,       // mov       eax, [edi]
  0x8B, 0x57, 0x04, // mov       edx, [edi + 0x4]
  0x8B, 0x7D, 0x0C, // mov       edi, [ebp + 0xc]
  0x8B, 0x1F,       // mov       ebx, [edi]
  0x8B, 0x4F, 0x04, // mov       ecx, [edi + 0x4]
  0x8B, 0x7D, 0x08, // mov       edi, [ebp + 0x8]
  0xF0,             // lock:
  0x0F, 0xC7, 0x0F, // cmpxchg8b [edi]
  0x5B,             // pop       ebx
  0x59,             // pop       ecx
  0x5F,             // pop       edi
  0x5D,             // pop       ebp
  0xC3              // ret
};

LONGLONG (__cdecl *Cmpxchg8b)(LONGLONG volatile* Dest, LONGLONG* Exch, LONGLONG* Comp) =
  (LONGLONG (__cdecl *)(LONGLONG volatile*, LONGLONG*, LONGLONG*))Cmpxchg8bData;

LONGLONG MyInterlockedCompareExchange64(LONGLONG volatile* Destination,
                                        LONGLONG Exchange,
                                        LONGLONG Comparand)
{
  return Cmpxchg8b(Destination, &Exchange, &Comparand);
}

#ifdef InterlockedCompareExchange64
#undef InterlockedCompareExchange64
#endif

#define InterlockedCompareExchange64(Destination, Exchange, Comparand) \
  MyInterlockedCompareExchange64(Destination, Exchange, Comparand)

//
// This stuff makes a thread-safe printf().
// We don't want characters output by one thread to be mixed
// with characters output by another. We want printf() to be
// "atomic".
// We use a critical section around vprintf() to achieve "atomicity".
//
static CRITICAL_SECTION PrintfCriticalSection;

int ts_printf(const char* Format, ...)
{
  int count;
  va_list ap;

  EnterCriticalSection(&PrintfCriticalSection);

  va_start(ap, Format);
  count = vprintf(Format, ap);
  va_end(ap);

  LeaveCriticalSection(&PrintfCriticalSection);

  return count;
}

#define TICK_COUNT_10MS_INCREMENT 0x800000

//
// This is the simulated tick counter.
// Its low 32 bits are going to be returned by
// our, simulated, GetTickCount().
//
// TICK_COUNT_10MS_INCREMENT is what the counter is
// incremented by every time. The value is so chosen
// that the counter quickly overflows in its
// low 32 bits.
//
static volatile LONGLONG SimulatedTickCount = 0;

//
// This is our simulated 32-bit GetTickCount()
// that returns a count that often overflows.
//
ULONG SimulatedGetTickCount(void)
{
  return (ULONG)SimulatedTickCount;
}

//
// This thread function will increment the simulated tick counter
// whose value's low 32 bits we'll be reading in SimulatedGetTickCount().
//
DWORD WINAPI SimulatedTickThread(LPVOID lpParameter)
{
  UNREFERENCED_PARAMETER(lpParameter);

  for (;;)
  {
    LONGLONG c;

    Sleep(10);

    // Get the counter value, add TICK_COUNT_10MS_INCREMENT to it and
    // store the result back.
    c = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0);
    InterlockedCompareExchange64(&SimulatedTickCount, c + TICK_COUNT_10MS_INCREMENT, c) != c);
  }

  return 0;
}

volatile LONG CountOfObserved32bitOverflows = 0;
volatile LONG CountOfObservedUpdateRaces = 0;

//
// This prints statistics that includes the true 64-bit value of
// SimulatedTickCount that we can't get from SimulatedGetTickCount() as it
// returns only its lower 32 bits.
//
// The stats also include:
// - the number of times that MyGetTickCount64() observes an overflow of
//   SimulatedGetTickCount()
// - the number of times MyGetTickCount64() fails to update its internal
//   counter because of a concurrent update in another thread.
//
void PrintStats(void)
{
  LONGLONG true64bitCounter = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0);

  ts_printf("  0x%08X`%08X <- true 64-bit count; ovfs: ~%d; races: %d\n",
            (ULONG)(true64bitCounter >> 32),
            (ULONG)true64bitCounter,
            CountOfObserved32bitOverflows,
            CountOfObservedUpdateRaces);
}

//
// This is our poor man's implementation of GetTickCount64()
// on top of GetTickCount().
//
// It's thread safe.
//
// When used with actual GetTickCount() instead of SimulatedGetTickCount()
// it must be called at least a few times in 49.7 days to ensure that
// it doesn't miss any overflows in GetTickCount()'s return value.
//
ULONGLONG MyGetTickCount64(void)
{
  static volatile LONGLONG Count = 0;
  LONGLONG curCount1, curCount2;
  LONGLONG tmp;

  curCount1 = InterlockedCompareExchange64(&Count, 0, 0);

  curCount2 = curCount1 & 0xFFFFFFFF00000000;
  curCount2 |= SimulatedGetTickCount();

  if ((ULONG)curCount2 < (ULONG)curCount1)
  {
    curCount2 += 0x100000000;

    InterlockedIncrement(&CountOfObserved32bitOverflows);
  }

  tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1);

  if (tmp != curCount1)
  {
    curCount2 = tmp;

    InterlockedIncrement(&CountOfObservedUpdateRaces);
  }

  return curCount2;
}

//
// This is an error counter. If a thread that uses MyGetTickCount64() notices
// any problem with what MyGetTickCount64() returns, it bumps up this error
// counter and stops. If one of threads sees a non-zero value in this
// counter due to an error in another thread, it stops as well.
//
volatile LONG Error = 0;

//
// This is a thread function that will be using MyGetTickCount64(),
// validating its return value and printing some stats once in a while.
//
// This function is meant to execute concurrently in multiple threads
// to create race conditions inside of MyGetTickCount64() and test it.
//
DWORD WINAPI TickUserThread(LPVOID lpParameter)
{
  DWORD user = (DWORD)lpParameter; // thread number
  ULONGLONG ticks[4];

  ticks[3] = ticks[2] = ticks[1] = MyGetTickCount64();

  while (!Error)
  {
    ticks[0] = ticks[1];
    ticks[1] = MyGetTickCount64();

    // Every ~100 ms sleep a little (slightly lowers CPU load, to about 90%)
    if (ticks[1] > ticks[2] + TICK_COUNT_10MS_INCREMENT * 10L)
    {
      ticks[2] = ticks[1];
      Sleep(1 + rand() % 20);
    }

    // Every ~1000 ms print the last value from MyGetTickCount64().
    // Thread 1 also prints stats here.
    if (ticks[1] > ticks[3] + TICK_COUNT_10MS_INCREMENT * 100L)
    {
      ticks[3] = ticks[1];
      ts_printf("%u:0x%08X`%08X\n", user, (ULONG)(ticks[1] >> 32), (ULONG)ticks[1]);

      if (user == 1)
      {
        PrintStats();
      }
    }

    if (ticks[0] > ticks[1])
    {
      ts_printf("%u:Non-monotonic tick counts: 0x%016llX > 0x%016llX!\n",
                user,
                ticks[0],
                ticks[1]);
      PrintStats();
      InterlockedIncrement(&Error);
      return -1;
    }
    else if (ticks[0] + 0x100000000 <= ticks[1])
    {
      ts_printf("%u:Too big tick count jump: 0x%016llX -> 0x%016llX!\n",
                user,
                ticks[0],
                ticks[1]);
      PrintStats();
      InterlockedIncrement(&Error);
      return -1;
    }

    Sleep(0); // be nice, yield to other threads.
  }

  return 0;
}

//
// This prints stats upon Ctrl+C and terminates the program.
//
BOOL WINAPI ConsoleEventHandler(DWORD Event)
{
  if (Event == CTRL_C_EVENT)
  {
    PrintStats();
  }

  return FALSE;
}

int main(void)
{
  HANDLE simulatedTickThreadHandle;
  HANDLE tickUserThreadHandle;
  DWORD dummy;

  // This is for the missing InterlockedCompareExchange64() workaround.
  VirtualProtect(Cmpxchg8bData, sizeof(Cmpxchg8bData), PAGE_EXECUTE_READWRITE, &dummy);

  InitializeCriticalSection(&PrintfCriticalSection);

  if (!SetConsoleCtrlHandler(&ConsoleEventHandler, TRUE))
  {
    ts_printf("SetConsoleCtrlHandler(&ConsoleEventHandler) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // Start the tick simulator thread.

  simulatedTickThreadHandle = CreateThread(NULL, 0, &SimulatedTickThread, NULL, 0, NULL);

  if (simulatedTickThreadHandle == NULL)
  {
    ts_printf("CreateThread(&SimulatedTickThread) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // Start one thread that'll be using MyGetTickCount64().

  tickUserThreadHandle = CreateThread(NULL, 0, &TickUserThread, (LPVOID)2, 0, NULL);
  if (tickUserThreadHandle == NULL)
  {
    ts_printf("CreateThread(&TickUserThread) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // The other thread using MyGetTickCount64() will be the main thread.

  TickUserThread((LPVOID)1);

  //
  // The app terminates upon any error condition detected in TickUserThread()
  // in any of the threads or by Ctrl+C.
  //

  return 0;
}

As a test I've been running this test app under Windows XP for 5+ hours on an otherwise idle machine that has 2 CPUs (idle, to avoid potential long starvation times and therefore avoid missing counter overflows that occur every 5 seconds) and it's still doing well.

Here's the latest output from the console:

2:0x00000E1B`C8800000
1:0x00000E1B`FA800000
  0x00000E1B`FA800000 <- true 64-bit count; ovfs: ~3824; races: 110858

As you can see, MyGetTickCount64() has observed 3824 32-bit overflows and failed to update the value of Count with its second InterlockedCompareExchange64() 110858 times. So, overflows indeed occur and the last number means that the variable is, in fact, being concurrently updated by the two threads.

You can also see that the 64-bit tick counts that the two threads receive from MyGetTickCount64() in TickUserThread() don't have anything missing in the top 32 bits and are pretty close to the actual 64-bit tick count in SimulatedTickCount, whose 32 low bits are returned by SimulatedGetTickCount(). 0x00000E1BC8800000 is visually behind 0x00000E1BFA800000 due to thread scheduling and infrequent stat prints, it's behind by exactly 100*TICK_COUNT_10MS_INCREMENT, or 1 second. Internally, of course, the difference is much smaller.

Now, on availability of InterlockedCompareExchange64()... It's a bit odd that it's officially available since Windows Vista and Windows Server 2003. Server 2003 is in fact build from the same code base as Windows XP.

But the most important thing here is that this function is built on top of the Pentium CMPXCHG8B instruction that's been available since 1998 or earlier (1), (2). And I can see this instruction in my Windows XP's (SP3) binaries. It's in ntkrnlpa.exe/ntoskrnl.exe (the kernel) and ntdll.dll (the DLL that exports kernel's NtXxxx() functions that everything's built upon). Look for a byte sequence of 0xF0, 0x0F, 0xC7 and disassemble the code around that place to see that these bytes aren't there coincidentally.

You can check availability of this instruction through the CPUID instruction (EDX bit 8 of CPUID function 0x00000001 and function 0x80000001) and refuse to run instead of crashing if the instruction isn't there, but these days you're unlikely to find a machine that doesn't support this instruction. If you do, it won't be a good machine for Windows XP and probably your application as well anyways.

在梵高的星空下 2024-12-24 14:18:37

感谢 Google 图书免费提供相关文献,我想出了一个简单快速的 GetTickCount64 实现,它在 Vista 之前的系统上也运行得很好(而且它仍然比从硬编码内存地址读取值)。

事实上,它就像调用中断 0x2A 一样简单,它映射到 KiGetTickCount。在 GCC 内联汇编中,这给出:

static __inline__ __attribute__((always_inline)) unsigned long long get_tick_count64()
{
    unsigned long long ret;
    __asm__ __volatile__ ("int $0x2a" : "=A"(ret) : : );
    return ret;
}

由于 KiGetTickCount 的工作方式,该函数可能最好称为 GetTickCount46,因为它执行右移 18,返回 46 位,而不是 64。尽管原始 Vista 版本也是如此。

请注意,KiGetTickCount 会破坏 edx,如果您打算实现自己的 32 位版本的更快实现(必须将 edx 添加到在这种情况下就是破坏列表!)。

Thanks to Google Books which kindly offered the relevant literature for free, I came up with an easy and fast implementation of GetTickCount64 which works perfectly well on pre-Vista systems too (and it still is somewhat less nasty than reading a value from a hardcoded memory address).

It is in fact as easy as calling interrupt 0x2A, which maps to KiGetTickCount. In GCC inline assembly, this gives:

static __inline__ __attribute__((always_inline)) unsigned long long get_tick_count64()
{
    unsigned long long ret;
    __asm__ __volatile__ ("int $0x2a" : "=A"(ret) : : );
    return ret;
}

Due to the way KiGetTickCount works, the function should probably better be called GetTickCount46, as it performs a right shift by 18, returning 46 bits, not 64. Though the same is true for the original Vista version, too.

Note that KiGetTickCount clobbers edx, this is relevant if you plan to implement your own faster implementation of the 32-bit version (must add edx to the clobber list in that case!).

街道布景 2024-12-24 14:18:37

这是另一种方法,Alex 包装器的变体,但仅使用 32 位互锁。它实际上只返回一个 60 位数字,但这在大约三千六百万年里仍然有效。 :-)

确实需要更频繁地调用它,至少每三天一次。这通常不应该是一个主要缺点。

ULONGLONG MyTickCount64(void)
{
    static volatile DWORD count = 0xFFFFFFFF;
    DWORD previous_count, current_tick32, previous_count_zone, current_tick32_zone;
    ULONGLONG current_tick64;

    previous_count = InterlockedCompareExchange(&count, 0, 0);
    current_tick32 = GetTickCount();

    if (previous_count == 0xFFFFFFFF)
    {
        // count has never been written
        DWORD initial_count;
        initial_count = current_tick32 >> 28;
        previous_count = InterlockedCompareExchange(&count, initial_count, 0xFFFFFFFF);

        if (previous_count == 0xFFFFFFFF)
        {   // This thread wrote the initial value for count
            previous_count = initial_count;
        }
        else if (previous_count != initial_count)
        {   // Another thread wrote the initial value for count,
            // and it differs from the one we calculated
            current_tick32 = GetTickCount();
        }
    }

    previous_count_zone = previous_count & 15;
    current_tick32_zone = current_tick32 >> 28;

    if (current_tick32_zone == previous_count_zone)
    {
        // The top four bits of the 32-bit tick count haven't changed since count was last written.
        current_tick64 = previous_count;
        current_tick64 <<= 28;
        current_tick64 += current_tick32 & 0x0FFFFFFF;
        return current_tick64;
    }

    if (current_tick32_zone == previous_count_zone + 1 || (current_tick32_zone == 0 && previous_count_zone == 15))
    {
        // The top four bits of the 32-bit tick count have been incremented since count was last written.
        InterlockedCompareExchange(&count, previous_count + 1, previous_count);
        current_tick64 = previous_count + 1;
        current_tick64 <<= 28;
        current_tick64 += current_tick32 & 0x0FFFFFFF;
        return current_tick64;
    }

    // Oops, we weren't called often enough, we're stuck
    return 0xFFFFFFFF;
}

Here's another approach, a variant of Alex's wrapper but using only 32-bit interlocks. It only actually returns a 60-bit number, but that's still good for about thirty-six million years. :-)

It does need to be called more often, at least once every three days. That shouldn't normally be a major drawback.

ULONGLONG MyTickCount64(void)
{
    static volatile DWORD count = 0xFFFFFFFF;
    DWORD previous_count, current_tick32, previous_count_zone, current_tick32_zone;
    ULONGLONG current_tick64;

    previous_count = InterlockedCompareExchange(&count, 0, 0);
    current_tick32 = GetTickCount();

    if (previous_count == 0xFFFFFFFF)
    {
        // count has never been written
        DWORD initial_count;
        initial_count = current_tick32 >> 28;
        previous_count = InterlockedCompareExchange(&count, initial_count, 0xFFFFFFFF);

        if (previous_count == 0xFFFFFFFF)
        {   // This thread wrote the initial value for count
            previous_count = initial_count;
        }
        else if (previous_count != initial_count)
        {   // Another thread wrote the initial value for count,
            // and it differs from the one we calculated
            current_tick32 = GetTickCount();
        }
    }

    previous_count_zone = previous_count & 15;
    current_tick32_zone = current_tick32 >> 28;

    if (current_tick32_zone == previous_count_zone)
    {
        // The top four bits of the 32-bit tick count haven't changed since count was last written.
        current_tick64 = previous_count;
        current_tick64 <<= 28;
        current_tick64 += current_tick32 & 0x0FFFFFFF;
        return current_tick64;
    }

    if (current_tick32_zone == previous_count_zone + 1 || (current_tick32_zone == 0 && previous_count_zone == 15))
    {
        // The top four bits of the 32-bit tick count have been incremented since count was last written.
        InterlockedCompareExchange(&count, previous_count + 1, previous_count);
        current_tick64 = previous_count + 1;
        current_tick64 <<= 28;
        current_tick64 += current_tick32 & 0x0FFFFFFF;
        return current_tick64;
    }

    // Oops, we weren't called often enough, we're stuck
    return 0xFFFFFFFF;
}
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文