Windows和pthread中提供的自旋锁

程序员文章站 2022-09-28 08:36:31

Windows和POSIX中都提供了自旋锁，我们也可以通过C++11的atomic来实现自旋锁。那么两者性能上面是什么关系？先引入实现代码：下面给出一个简单测试，两组线程，一组用来插入，另外一组用来取出。测试结果显示：（1）无论是Windows，还是POSIX提供的C语言版本的自旋锁，都和C++ ......

Windows和POSIX中都提供了自旋锁，我们也可以通过C++11的atomic来实现自旋锁。那么两者性能上面是什么关系？先引入实现代码：

#ifndef __spinlock_h__
#define __spinlock_h__

#include <atomic>

#ifdef _WIN32

#include <Windows.h>

class spinlock_mutex
{
public:
    static constexpr DWORD SPINLOCK_COUNT = -1;
public:
    // 在初始化时，会出现资源不足的问题，这里忽略这个问题
    // 具体参考Critical Sections and Error Handling(Windows via C/C++)
    spinlock_mutex()
    {
        InitializeCriticalSectionAndSpinCount(&m_cs, SPINLOCK_COUNT);
    }

    ~spinlock_mutex()
    {
        DeleteCriticalSection(&m_cs);
    }

    void lock()
    {
        EnterCriticalSection(&m_cs);
    }

    bool try_lock()
    {
        return TryEnterCriticalSection(&m_cs) == TRUE;
    }

    void unlock()
    {
        LeaveCriticalSection(&m_cs);
    }

private:
    CRITICAL_SECTION m_cs;
};

#elif defined(_POSIX_C_SOURCE)

#include <pthread.h>

class spinlock_mutex
{
public:
    // 这里不处理可能出现的调用错误
    spinlock_mutex()
    {
        pthread_spin_init(&m_cs, PTHREAD_PROCESS_PRIVATE);
    }

    ~spinlock_mutex()
    {
        pthread_spin_destroy(&m_cs);
    }

    void lock()
    {
        pthread_spin_lock(&m_cs);
    }

    bool try_lock()
    {
        return pthread_spin_trylock(&m_cs) == 0;
    }

    void unlock()
    {
        pthread_spin_unlock(&m_cs);
    }

private:
    pthread_spinlock_t m_cs;
};

#else

class spinlock_mutex
{
    std::atomic_flag flag;
public:
    spinlock_mutex() :
        flag{ ATOMIC_FLAG_INIT }
    {}

    void lock()
    {
        while (flag.test_and_set(std::memory_order_acquire));
    }

    void unlock()
    {
        flag.clear(std::memory_order_release);
    }

    bool try_lock()
    {
        return !flag.test_and_set(std::memory_order_acquire);
    }
};

#endif


#endif    // __spinlock_h__

下面给出一个简单测试，两组线程，一组用来插入，另外一组用来取出。测试结果显示：

（1）无论是Windows，还是POSIX提供的C语言版本的自旋锁，都和C++11使用atomic构建的自旋锁效率相近。

（2）在插入线程数和取出线程数相同的情况下，线程数越多，效率越低。

下面是测试代码：

#include <memory>
#include <cassert>

#include <iostream>
#include <vector>
#include <thread>
#include <future>
#include <random>
#include <chrono>

#include "spinlock.h"
#include <forward_list>


struct student_name
{
    student_name(int age = 0)
        : age(age), next(nullptr)
    {

    }

    int age;

    student_name* next;
};



spinlock_mutex g_mtx;
std::forward_list<int> g_students;


std::atomic<int> g_inserts; // insert num (successful)
std::atomic<int> g_drops;   // drop num (successful)

std::atomic<int> g_printNum;    // as same as g_drops

std::atomic<long long> g_ageInSum;   // age sum when producing student_name
std::atomic<long long> g_ageOutSum;  // age sum when consuming student_name

std::atomic<bool> goOn(true);

constexpr int INSERT_THREAD_NUM = 1;
constexpr int DROP_THREAD_NUM = 1;

constexpr int ONE_THREAD_PRODUCE_NUM = 5000000;    // when testing, no more than this number, you know 20,000,00 * 100 * 10 ~= MAX_INT if thread num <= 10

inline void printOne(student_name* t)
{
    g_printNum.fetch_add(1, std::memory_order_relaxed);
    g_ageOutSum.fetch_add(t->age, std::memory_order_relaxed);
    g_drops.fetch_add(1, std::memory_order_relaxed);
    delete t;
}

void insert_students(int idNo)
{
    std::default_random_engine dre(time(nullptr));
    std::uniform_int_distribution<int> ageDi(1, 99);

    for (int i = 0; i < ONE_THREAD_PRODUCE_NUM; ++i)
    {
        int newAge = ageDi(dre);
        g_ageInSum.fetch_add(newAge, std::memory_order_relaxed);

        {
            std::lock_guard<spinlock_mutex> lock(g_mtx);
            g_students.push_front(newAge);
            
        }

        // use memory_order_relaxed avoiding affect folly memory order
        g_inserts.fetch_add(1, std::memory_order_relaxed);
    }
}

void drop_students(int idNo)
{
    while (auto go = goOn.load(std::memory_order_consume))
    {
        {
            std::forward_list<int> tmp;
            {
                std::lock_guard<spinlock_mutex> lock(g_mtx);
                std::swap(g_students, tmp);
            }
            auto it = tmp.begin();
            while (it != tmp.end())
            {
                g_printNum.fetch_add(1, std::memory_order_relaxed);
                g_ageOutSum.fetch_add(*it, std::memory_order_relaxed);
                g_drops.fetch_add(1, std::memory_order_relaxed);
                ++it;
            }
        }
    }
}

int main()
{
    auto start = std::chrono::system_clock::now();

    std::vector<std::future<void>> insert_threads;
    std::vector<std::future<void>> drop_threads;

    for (auto i = 0; i != INSERT_THREAD_NUM; ++i)
    {
        insert_threads.push_back(std::async(std::launch::async, insert_students, i));
    }

    for (auto i = 0; i != DROP_THREAD_NUM; ++i)
    {
        drop_threads.push_back(std::async(std::launch::async, drop_students, i));

    }

    for (auto& thread : insert_threads)
    {
        thread.get();
    }

    std::this_thread::sleep_for(std::chrono::milliseconds(1000));

    goOn.store(false, std::memory_order_release);

    for (auto& thread : drop_threads)
    {
        thread.get();
    }

    {
        std::forward_list<int> tmp;
        {
            std::lock_guard<spinlock_mutex> lock(g_mtx);
            std::swap(g_students, tmp);
        }
        auto it = tmp.begin();
        while (it != tmp.end())
        {
            g_printNum.fetch_add(1, std::memory_order_relaxed);
            g_ageOutSum.fetch_add(*it, std::memory_order_relaxed);
            g_drops.fetch_add(1, std::memory_order_relaxed);
            ++it;
        }
    }

    auto end = std::chrono::system_clock::now();
    std::chrono::duration<double> diff = end - start;
    std::cout << "Time to insert and drop is: " << diff.count() << " s\n";

    std::cout << "insert count1: " << g_inserts.load() << std::endl;
    std::cout << "drop count1: " << g_drops.load() << std::endl;
    std::cout << "print num1: " << g_printNum.load() << std::endl;

    std::cout << "age in1: " << g_ageInSum.load() << std::endl;
    std::cout << "age out1: " << g_ageOutSum.load() << std::endl;

    std::cout << std::endl;
}

关于自选锁，还有以下内容需要说明：

（1）应用层用spinlock的最大问题是不能跟kernel一样的关中断（cli/sti），假设并发稍微多点，线程1在lock之后unlock之前发生了时钟中断，
* 一段时间后才会被切回来调用unlock，那么这段时间中另一个调用lock的线程不就得空跑while了？这才是最浪费cpu时间的地方。
* 所以不能关中断就只能sleep了，怎么着都存在巨大的冲突代价。

（2）具体参考：https://www.zhihu.com/question/55764216

上一篇：刘邦临死为什么要杀这三个人杀他们的原因是什么

下一篇： oracle学习笔记（九） SQL常用函数说明以及使用