constexpr

constexpr int factorial(int n) {
    return n <= 1 ? 1 : (n * factorial(n - 1));
}

# loops and local variables since c++14
constexpr int loop(int i) {
    if(i <= 1) return i;

    int sum = 0;
    for(int j = 0; j < i; ++j) {
        sum += j;
    }

    return sum;
}

int main() {
    constexpr auto s = factorial(5);
    int array[s];
    std::cout << s << "\n";

    constexpr auto sum = loop(10);
    std::cout << sum << "\n";
}

declare a temporary variable in the if and switch statements

int main() {
    std::vector<int> vec = {1, 2, 3, 4};

    if (const std::vector<int>::iterator itr = std::find(vec.begin(), vec.end(), 3); itr != vec.end()) {
        *itr = 4;
    }

    # after c++17, can be simplefied by using `auto`
    if (auto itr = std::find(vec.begin(), vec.end(), 2); itr != vec.end()) {
        std::cout << *itr << "\n";
        *itr = 4;
    }
}

Structured binding

std::tuple<int, double, std::string> f() {
    return std::make_tuple(1, 2.3, "Hello");
}

int main() {
    auto [x, y, z] = f();
    std::cout << x << ", " << y << ", " << z << "\n";
}

Type inference

template<typename R, typename T, typename U>
R add(T x, U y) {
    return x+y;
}

# c++11
template<typename T, typename U>
auto add2(T x, U y) -> decltype(x+y) {
    return x + y;
}

# c++14
template<typename T, typename U>
auto add3(T x, U y) {
    return x + y;
}

int main () {
    # before C++11
    std::vector<int> vec = {1, 2, 3, 4};
    std::vector<int>::const_iterator it = vec.cbegin();

    # using auto
    auto it2 = vec.cbegin();

    auto i = 5;              // i as int
    auto arr = new auto(10); // arr as int *

    # decltype

    auto x = 1;
    auto y = 2;
    decltype(x + y) z;

    if (std::is_same<decltype(x), int>::value)
        std::cout << "type x == int" << std::endl;
    if (std::is_same<decltype(x), float>::value)
        std::cout << "type x == float" << std::endl;
    if (std::is_same<decltype(x), decltype(z)>::value)
        std::cout << "type z == type x" << std::endl;


    # tail type inference

    auto q = add3<double, int>(1.0, 2);
    std::cout << "q: " << q << "\n";
}

Control flow

template<typename T>
auto print_type_info(const T& t) {
    if constexpr (std::is_integral<T>::value) {
        return t + 1;
    } else {
        return t + 0.001;
    }
}

int main() {
    # if constexpr
    std::cout << print_type_info(5) << "\n";
    std::cout << print_type_info(3.14) << "\n";

    # ranged based for loop
    std::vector<int> vec = {1, 2, 3, 4};

    if (auto itr = std::find(vec.begin(), vec.end(), 3); itr != vec.end())
        *itr = 4;

    for (auto element : vec)
        std::cout << element << "\n"; # read only
    for (auto &element : vec) {
        element += 1;               # writeable
    }
}

Templates

# default parameters of the template:
template<typename T = int, typename U = int>
auto add(T x, U y) -> decltype(x+y) {
    return x+y;
}

# Variadic templates
template<typename... Ts> class Magic;

# define one parameter
template<typename Require, typename... Args> class Magic2;

template<typename... Ts>
void magic_func(Ts... args) {
    std::cout << sizeof...(args) << std::endl;
}

# Recursive template function
template<typename T0>
void printf1(T0 value) {
    std::cout << value << std::endl;
}

template<typename T, typename... Ts>
void printf1(T value, Ts... args) {
    std::cout << value << "\n";
    printf1(args...);
}

# Variable parameter template expansion
template<typename T0, typename... T>
void printf2(T0 t0, T... t) {
    std::cout << t0 << "\n";
    if constexpr (sizeof...(t) > 0) printf2(t...);
}

# Initialize list expansion
template<typename T, typename... Ts>
auto printf3(T value, Ts... args) {
    std::cout << value << "\n";
    (void) std::initializer_list<T>{ ([&args] {
            std::cout << args << "\n";
        }(), value)...};
}

# Fold expression
template<typename ... T>
auto sum(T ... t) {
    return (t + ...);
}

# Non-type template parameter deduction
template <typename T, int BufSize>
class buffer_t {
public:
    T& alloc();
    void free(T& item);
private:
    T data[BufSize];
};

template <auto value> void foo() {
    std::cout << value << "\n";
    return;
}


int main () {
    # default parameters of the template:
    auto s = add(3, 4);
    # normal
    auto s2 = add<double, double>(2.1, 2.1);

    std::cout << s << "  " << s2 << "\n";


    # Variadic templates
    magic_func();      # 0
    magic_func(1);     # 1
    magic_func(1, "Hello"); # 2

    # Recursive template function
    printf1(1, 2, "Good", 1.1);

    # Variable parameter template expansion
    printf2(1, 2, "Better", 1.1);

    # Initialize list expansion
    printf3(1, 2, "Best", 1.1);

    # Fold expression
    std::cout << "Sum: " << sum(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) << "\n";

    # Non-type template parameter deduction
    buffer_t<int, 100> buf; # 100 as template parameter

    foo<10>();
    foo<'D'>();
}

Object-oriented

struct Base {
    virtual void foo(int);

    virtual void foo2() final;
};

struct SubClass: Base {
    virtual void foo(int) override; # legal
    #virtual void foo(float) override; # illegal, no virtual function in super class
    #void foo2(); # illegal, foo has final

public:
    SubClass() = default; # explicit let compiler use default constructor
    SubClass& operator=(const SubClass&) = delete; # explicit declare refuse constructor
};
    
int main () {
    # Strongly typed enumerations
    enum class new_enum : unsigned int {
        value1,
        value2,
        value3 = 100,
        value4 = 100
    };

    if (new_enum::value3 == new_enum::value4) { # true
        std::cout << "new_enum::value3 == new_enum::value4" << "\n";
    }
}

Lambda expression

# C++14 campture rvalues
void lambda_expression_capture() {
    auto important = std::make_unique<int>(1);
    auto add = [v1 = 1, v2 = std::move(important)](int x, int y) -> int {
        return x+y+v1+(*v2);
    };
    std::cout << add(3,4) << std::endl;
}

# Generic Lambda
# Lambda expressions can use auto parameters
void lambda_generic() {
    auto generic = [](auto x, auto y) {
        return x+y;
    };

    std::cout << generic(1, 2) << std::endl;
    std::cout << generic(1.1, 2.2) << std::endl;
}

int main () {
    int value = 1;
    auto copy_value = [&value] {
        return value;
    };

    value = 100;
    auto stored_value = copy_value();
    std::cout << "stored_value = " << stored_value << std::endl;
}

Function Object Wrapper

# std::function
# C++11 std::function is a generic, polymorphic function wrapper whose instances can 
##store, copy, and call any target entity that can be called. It is also an existing callable to C++.

int foo(int para) {
    return para;
}

# std::bind and std::placeholder
# And std::bind is used to bind the parameters of the function call. It solves the requirement 
# that we may not always be able to get all the parameters of a function at one time. Through this function, 
# we can Part of the call parameters are bound to the function in advance to become a new object, and then 
# complete the call after the parameters are complete

int foo2(int a, int b, int c) {
    return a + b + c;
}

int main () {
    # std::function wraps a function that take int paremeter and returns int value
    std::function<int(int)> func = foo;

    int important = 10;
    std::function<int(int)> func2 = [&](int value) -> int {
        return 1+value+important;
    };
    std::cout << func(10) << std::endl;
    std::cout << func2(10) << std::endl;

    # std::bind and std::placeholder

    # bind parameter 1, 2 on function foo, and use std::placeholders::_1 as placeholder
    # for the first parameter.
    auto bindFoo = std::bind(foo2, std::placeholders::_1, 1,2);
    # when call bindFoo, we only need one param left
    std::cout << bindFoo(1) << "\n";
}

rvalue Reference

# pvalue, pure rvalue, purely rvalue, either purely literal, such as 10, true; either the result of 
# the evaluation is equivalent to a literal or anonymous temporary object, for example 1+2. Temporary 
# variables returned by non-references, temporary variables generated by operation expressions, original 
# literals, and Lambda expressions are all pure rvalue values.

# xvalue, expiring value is the concept proposed by C++11 to introduce rvalue references (so in traditional C++, 
# pure rvalue and rvalue are the same concept), that is, A value that is destroyed but can be moved.

# To get a xvalue, you need to use the declaration of the rvalue reference: T &&, where T is the type. 
# The statement of the rvalue reference extends the lifecycle of this temporary value, and as long as the 
# variable is alive, the xvalue will continue to survive.

# C++11 provides the std::move method to unconditionally convert lvalue parameters to rvalues. With it we can 
# easily get a rvalue temporary object

void reference(int& v) {
    std::cout << "lvalue reference" << std::endl;
}

void reference(int&& v) {
    std::cout << "rvalue reference" << std::endl;
}

template <typename T>
void pass(T&& v) {
    std::cout << "          normal param passing: ";
    reference(v);
    std::cout << "       std::move param passing: ";
    reference(std::move(v));
    std::cout << "    std::forward param passing: ";
    reference(std::forward<T>(v));
    std::cout << "static_cast<T&&> param passing: ";
    reference(static_cast<T&&>(v));
}

int main () {
    std::cout << "rvalue pass:" << std::endl;
    pass(1);

    std::cout << "lvalue pass:" << std::endl;
    int l = 1;
    pass(l);
}

containers

int main () {
    # std::array

    std::array<int, 4> arr = {1, 2, 3, 4};

    arr.empty(); # check if container is empty
    arr.size();  # return the size of the container

    # iterator support
    for (auto &i : arr) {
        # ...
    }

    # use lambda expression for sort
    std::sort(arr.begin(), arr.end(), [](int a, int b) {
        return b < a;
    });

    # array size must be constexpr
    constexpr int len = 4;
    std::array<int, len> arr2 = {1, 2, 3, 4};

    # When we started using std::array, it was inevitable that we would encounter a C-style compatible 
    # interface. There are three ways to do this:

    int* p = &arr[0];
    int* pp = arr.data();

    # std::tuple (std::pair can save only two parameter)

    # std::make_tuple: construct tuple
    # std::get: Get the value of a position in the tuple
    # std::tie: tuple unpacking

    auto student = std::make_tuple(3.8, 'A', "John");

    std::cout << "ID: 0, "
              << "GPA: "   << std::get<0>(student) << ", "
              << "Grade: " << std::get<1>(student) << ", "
              << "Name: "  << std::get<2>(student) << '\n';


    double gpa;
    char grade;
    std::string name;

    # unpack tuples
    std::tie(gpa, grade, name) = std::make_tuple(2.8, 'B', "Balazs");
    std::cout   << "ID: 1, "
                << "GPA: "   << gpa << ", "
                << "Grade: " << grade << ", "
                << "Name: "  << name << '\n';

    # Merge and Iteration,  Another common requirement is to merge two tuples, which can be done with 
    # std::tuple_cat:
    std::tuple<std::string, double, double, int> t("123", 4.5, 6.7, 8);
    auto new_tuple = std::tuple_cat(std::make_tuple(2.8, 'B', "Balazs"), std::move(t));
}

Smart pointers

# The basic idea is to count the number of dynamically allocated objects. Whenever you add a reference to 
# the same object, the reference count of the referenced object is incremented once. Each time a reference is 
# deleted, the reference count is decremented by one. When the reference count of an object is reduced to zero, 
# the pointed heap memory is automatically deleted.

# std::shared_ptr
void foo(std::shared_ptr<int> i) {
    (*i)++;
}

# unique_ptr
struct Foo {
    Foo()      { std::cout << "Foo::Foo" << std::endl;  }
    ~Foo()     { std::cout << "Foo::~Foo" << std::endl; }
    void foo() { std::cout << "Foo::foo" << std::endl;  }
};

void f(const Foo &) {
    std::cout << "f(const Foo&)" << std::endl;
}

int main () {
    # std::shared_ptr

    # std::shared_ptr is a smart pointer that records how many shared_ptr points to an object, eliminating the 
    # display call delete, which automatically deletes the object when the reference count becomes zero.
    # But not enough, because using std::shared_ptr still needs to be called with new, which makes the code a  
    # certain degree of asymmetry.
    # std::make_shared can be used to eliminate the explicit use of new, so std::make_shared will allocate the 
    # objects in the generated parameters. And return the std::shared_ptr pointer of this object type.

    {
        auto pointer = std::make_shared<int>(10);
        foo(pointer);
        std::cout << *pointer << std::endl; // 11
        # The shared_ptr will be destructed before leaving the scope
    }

    # std::shared_ptr can get the raw pointer through the get() method and reduce the reference count by reset().
    # And see the reference count of an object by use_count()

    {

        auto pointer = std::make_shared<int>(10);
        auto pointer2 = pointer; # reference count+1
        auto pointer3 = pointer; # reference count+1
        int *p = pointer.get(); # no increase of reference count
        std::cout << "pointer.use_count() = " << pointer.use_count() << std::endl; # 3
        std::cout << "pointer2.use_count() = " << pointer2.use_count() << std::endl; # 3
        std::cout << "pointer3.use_count() = " << pointer3.use_count() << std::endl; # 3

        pointer2.reset();
        std::cout << "reset pointer2:" << std::endl;
        std::cout << "pointer.use_count() = " << pointer.use_count() << std::endl; # 2
        std::cout << "pointer2.use_count() = " << pointer2.use_count() << std::endl; # 0, pointer2 has reset
        std::cout << "pointer3.use_count() = " << pointer3.use_count() << std::endl; # 2
        pointer3.reset();
        std::cout << "reset pointer3:" << std::endl;
        std::cout << "pointer.use_count() = " << pointer.use_count() << std::endl; # 1
        std::cout << "pointer2.use_count() = " << pointer2.use_count() << std::endl; # 0
        std::cout << "pointer3.use_count() = " << pointer3.use_count() << std::endl; # 0, pointer3 has reset

    }

    # std::unique_ptr

    # std::unique_ptr is an exclusive smart pointer that prohibits other smart pointers from sharing the same 
    # object, thus keeping the code safe:

    std::unique_ptr<int> pointer = std::make_unique<int>(10); # make_unique was introduced in C++14
    # std::unique_ptr<int> pointer2 = pointer; // illegal

    # Since it is monopolized, in other words, it cannot be copied. However, we can use std::move to transfer 
    # it to other unique_ptr

    {
        std::unique_ptr<Foo> p1(std::make_unique<Foo>());

        # p1 is not empty, prints
        if (p1)
            p1->foo();

        {
            std::unique_ptr<Foo> p2(std::move(p1));

            # p2 is not empty, prints
            f(*p2);

            # p2 is not empty, prints
            if(p2)
                p2->foo();

            # p1 is empty, no prints
            if(p1)
                p1->foo();

            p1 = std::move(p2);

            # p2 is empty, no prints
            if(p2)
                p2->foo();
            std::cout << "p2 was destroied" << std::endl;
        }

        # p1 is not empty, prints
        if (p1)
            p1->foo();

        # Foo instance will be destroied when leaving the scope
    }

    # std::weak_ptr

    # it there is a cycle reference shared pointers cannot be released so we can solve this by using weak Pointers
    # The solution to this problem is to use the weak reference pointer std::weak_ptr, which is a weak reference 
    # (compared to std::shared_ptr is a strong reference). A weak reference does not cause an increase in the 
    # reference count

    # std::weak_ptr has no * operator and -> operator, so it can't operate on resources. Its only function is to
    # check if std::shared_ptr exists, its expired() The method can return true when the resource is not released,
    # otherwise it returns false.
}

Regular Expression

int main () {
    std::string fnames[] = {"foo.txt", "bar.txt", "test", "a0.txt", "AAA.txt"};
    # In C++, `\` will be used as an escape character in the string. In order for `\.` to be passed as a 
    # regular expression, it is necessary to perform second escaping of `\`, thus we have `\\.`
    std::regex txt_regex("[a-z]+\\.txt");
    for (const auto &fname: fnames)
        std::cout << fname << ": " << std::regex_match(fname, txt_regex) << std::endl;

    # Another common form is to pass in the three arguments std::string/std::smatch/std::regex. The essence
    # of std::smatch is actually std::match_results. In the standard library, std::smatch is defined as 
    # std::match_results<std::string::const_iterator>, which means match_results of a substring iterator type. 
    # Use std::smatch to easily get the matching results

    std::regex base_regex("([a-z]+)\\.txt");
    std::smatch base_match;
    for(const auto &fname: fnames) {
        if (std::regex_match(fname, base_match, base_regex)) {
            # the first element of std::smatch matches the entire string
            # the second element of std::smatch matches the first expression with brackets
            if (base_match.size() == 2) {
                std::string base = base_match[1].str();
                std::cout << "sub-match[0]: " << base_match[0].str() << std::endl;
                std::cout << fname << " sub-match[1]: " << base << std::endl;
            }
        }
    }
}

Parallelism and Concurrency

int v = 1;

void critical_section(int change_v) {
    static std::mutex mtx;
    std::lock_guard<std::mutex> lock(mtx);

    # execute contention works
    v = change_v;

    # mtx will be released after leaving the scope
}

void critical_section2(int change_v) {
    static std::mutex mtx;
    std::unique_lock<std::mutex> lock(mtx);
    # do contention operations
    v = change_v;
    std::cout << v << std::endl;
    # release the lock
    lock.unlock();

    # during this period,
    # others are allowed to acquire v

    # start another group of contention operations
    # lock again
    lock.lock();
    v += 1;
    std::cout << v << std::endl;
}

std::atomic<int> count = {0};

std::atomic<int> counter = {0};

struct A {
    float x;
    int y;
    long long z;
};

int main () {
    # Basic of Parallelism

    # std::thread is used to create an execution thread instance, so it is the basis for all concurrent 
    # programming. It needs to include the <thread> header file when using it. It provides a number of basic 
    # thread operations, such as get_id() to get the thread ID of the thread being created, use join() to join 
    # a thread

    std::thread t([](){
        std::cout << "hello world." << std::endl;
    });
    t.join();

    # Mutex and Critical Section

    # std::mutex is the most basic mutex class in C++11, and you can create a mutex by instantiating std::mutex.
    # It can be locked by its member function lock(), and unlock() can be unlocked. But in the process of actually
    # writing the code, it is best not to directly call the member function, Because calling member functions, 
    # you need to call unlock() at the exit of each critical section, and of course, exceptions. At this time,
    # C++11 also provides a template class std::lock_gurad for the RAII syntax for the mutex.

    # RAII guarantees the exceptional security of the code while losing the simplicity of the code.

    std::thread t1(critical_section, 2), t2(critical_section, 3);
    t1.join();
    t2.join();

    # v is global variable now
    std::cout << v << std::endl;

    # Because C++ guarantees that all stack objects will be destroyed at the end of the declaration period, such
    # code is also extremely safe. Whether critical_section() returns normally or if an exception is thrown in the
    # middle, a stack rollback is thrown, and unlock() is automatically called.

    # And std::unique_lock is more flexible than std::lock_guard, std::unique_lock is more flexible. Objects of
    # std::unique_lock manage the locking and unlocking operations on the mutex object with exclusive ownership 
    # (no other unique_lock objects owning the ownership of a mutex object). So in concurrent programming, it is 
    # recommended to use std::unique_lock.

    # std::lock_guard cannot explicitly call lock and unlock, and std::unique_lock can be called anywhere after
    # the declaration. It can reduce the scope of the lock and provide higher concurrency.

    # If you use the condition variable std::condition_variable::wait you must use std::unique_lock as a parameter.

    std::thread t3(critical_section2, 2), t4(critical_section2, 3);
    t3.join();
    t4.join();

    # Future

    # std::future, which provides a way to access the results of asynchronous operations.
    # Imagine if our main thread A wants to open a new thread B to perform some of our expected tasks and return
    # me a result. At this time, thread A may be busy with other things, and have no time to take into account 
    # the results of B. So we naturally hope to get the result of thread B at a certain time.

    # Before the introduction of std::future in C++11, the usual practice is: Create a thread A, start task B in
    # thread A, send an event when it is ready, and save the result in a global variable. The main function thread
    # A is doing other things. When the result is needed, a thread is called to wait for the function to get the 
    # result of the execution.

    # The std::future provided by C++11 simplifies this process and can be used to get the results of asynchronous
    # tasks. Naturally, we can easily imagine it as a simple means of thread synchronization, namely the barrier.

    # pack a lambda expression that returns 7 into a std::packaged_task
    std::packaged_task<int()> task([](){return 7;});
    # get the future of task
    std::future<int> result = task.get_future();    # run task in a thread
    std::thread(std::move(task)).detach();
    std::cout << "waiting...";
    result.wait(); // block until future has arrived
    # output result
    std::cout << "done!" << std:: endl << "future result is " << result.get() << std::endl;

    # Condition Variable

    # The condition variable std::condition_variable was born to solve the deadlock and was introduced when the
    # mutex operation was not enough. For example, a thread may need to wait for a condition to be true 
    # to continue execution. A dead wait loop can cause all other threads to fail to enter the critical section 
    # so that when the condition is true, a deadlock occurs. Therefore, the condition_variable instance is created
    # primarily to wake up the waiting thread and avoid deadlocks. notd_one() of std::condition_variable is used 
    # to wake up a thread; notify_all() is to notify all threads.

    std::queue<int> produced_nums;
    std::mutex mtx;
    std::condition_variable cv;
    bool notified = false;  # notification sign

    auto producer = [&]() {
        for (int i = 0; ; i++) {
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
            std::unique_lock<std::mutex> lock(mtx);
            std::cout << "producing " << i << std::endl;
            produced_nums.push(i);
            notified = true;
            cv.notify_all();
        }
    };

    auto consumer = [&]() {
        while (true) {
            std::unique_lock<std::mutex> lock(mtx);
            while (!notified) {  # avoid spurious wakeup
                cv.wait(lock);
            }

            # temporal unlock to allow producer produces more rather than
            # let consumer hold the lock until its consumed.
            lock.unlock();
            std::this_thread::sleep_for(std::chrono::milliseconds(1000)); # consumer is slower
            lock.lock();
            if (!produced_nums.empty()) {
                std::cout << "consuming " << produced_nums.front() << std::endl;
                produced_nums.pop();
            }
            notified = false;
        }
    };


    std::thread p(producer);
    std::thread cs[2];
    
    for (int i = 0; i < 2; ++i) {
        cs[i] = std::thread(consumer);
    }
    
    p.join();
    
    for (int i = 0; i < 2; ++i) {
        cs[i].join();
    }

    # Atomic Operation and Memory Model

    # std::mutex can solve the problem of concurrent read and write, but the mutex is an operating system level
    # function. This is because the implementation of a mutex usually contains two basic principles:

    # Provide automatic state transition between threads, that is, "lock" state
    # Ensure that the memory of the manipulated variable is isolated from the critical section during the mutex 
    # operation
    # This is a very strong set of synchronization conditions, in other words, when it is finally compiled into a
    # CPU instruction, it will behave as a lot of instructions (we will look at how to implement a simple mutex 
    # later). This seems too harsh for a variable that requires only atomic operations (no intermediate state).

    # The research on synchronization conditions has a very long history, and we will not go into details here. 
    # Readers should understand that under the modern CPU architecture, atomic operations at the CPU instruction 
    # level are provided. Therefore, in the C + + 11 multi-threaded shared variable reading and writing, the 
    # introduction of the std::atomic template, so that we instantiate an atomic type, will be a Atomic type read 
    # and write operations are minimized from a set of instructions to a single CPU instruction. E.g:

    # And provides basic numeric member functions for atomic types of integers or floating-point numbers, for
    # example, Including fetch_add, fetch_sub, etc., and the corresponding +, - version is provided by overload.

    {
        # count is a global variable
        std::thread t1([](){
            count.fetch_add(1);
        });
        std::thread t2([](){
            count++;        # identical to fetch_add
            count += 1;     # identical to fetch_add
        });
        t1.join();
        t2.join();
        std::cout << count << std::endl;
    }

    std::atomic<A> a;
    std::cout << std::boolalpha << a.is_lock_free() << std::endl;


    # Memory Orders

    # In order to achieve the ultimate performance and achieve consistency of various strength requirements,
    # C++11 defines six different memory sequences for atomic operations. The option std::memory_order expresses 
    # four synchronization models between multiple threads


    # Relaxed model:
    # Under this model, atomic operations within a single thread are executed sequentially, and instruction 
    # reordering is not allowed, but the order of atomic operations between different threads is arbitrary. The 
    # type is specified by std::memory_order_relaxed

    {
        std::vector<std::thread> vt;
        for (int i = 0; i < 100; ++i) {
            vt.emplace_back([](){
                counter.fetch_add(1, std::memory_order_relaxed);
            });
        }

        for (auto& t : vt) {
            t.join();
        }
        std::cout << "current counter:" << counter << std::endl;
    }

    # Release/consumption model:
    # In this model, we begin to limit the order of operations between processes. If a thread needs to modify a 
    # value, but another thread will have a dependency on that operation of the value, that is, the latter depends.
    # former. Specifically, thread A has completed three writes to x, and thread B relies only on the third x write 
    # operation, regardless of the first two write behaviors of x, then A When active x.release() (ie using 
    # std::memory_order_release), the option std::memory_order_consume ensures that B observes A when calling 
    # x.load() Three writes to x.

    {
        std::atomic<int*> ptr;
        int v;
        std::thread producer([&]() {
            int* p = new int(42);
            v = 1024;
            ptr.store(p, std::memory_order_release);
        });
        std::thread consumer([&]() {
            int* p;
            while(!(p = ptr.load(std::memory_order_consume)));

            std::cout << "p: " << *p << std::endl;
            std::cout << "v: " << v << std::endl;
        });
        producer.join();
        consumer.join();
    }


    # Release/Acquire model:
    # Under this model, we can further tighten the order of atomic operations between different threads, 
    # specifying the timing between releasing std::memory_order_release and getting std::memory_order_acquire.
    # All write operations before the release operation are visible to any other thread, ie, happens-before.

    # As you can see, std::memory_order_release ensures that the write behavior after it does not occur before
    # the release operation, is a backward barrier, and std::memory_order_acquire ensures the previous write
    # behavior after it, no It will happen after the get operation, it is a forward barrier. For the option
    # std::memory_order_acq_rel, it combines the characteristics of the two, and only determines a memory
    # barrier, so that the current thread reads and writes to the memory. Will not be rearranged before and after
    # this operation.

    {
        std::vector<int> v;
        std::atomic<int> flag = {0};
        std::thread release([&]() {
            v.push_back(42);
            flag.store(1, std::memory_order_release);
        });
        std::thread acqrel([&]() {
            int expected = 1; # must before compare_exchange_strong
            while(!flag.compare_exchange_strong(expected, 2, std::memory_order_acq_rel)) {
                expected = 1; # must after compare_exchange_strong
            }
            # flag has changed to 2
        });
        std::thread acquire([&]() {
            while(flag.load(std::memory_order_acquire) < 2);

            std::cout << v.at(0) << std::endl; # must be 42
        });
        release.join();
        acqrel.join();
        acquire.join();
    }

    # In this case we used compare_exchange_strong, which is the Compare-and-swap primitive, which has a weaker
    # version, compare_exchange_weak, which allows a failure to be returned even if the exchange is successful. 
    # The reason is due to a false failure on some platforms, specifically, when the CPU performs a context 
    # switch, another thread loads the same address to produce an inconsistency. In addition, the performance 
    # of compare_exchange_strong may be slightly worse than compare_exchange_weak, but in most cases, 
    # compare_exchange_strong should be limited.


    # Sequential Consistent Model:
    # Under this model, atomic operations satisfy sequence consistency, which in turn can cause performance loss.
    # It can be specified explicitly by std::memory_order_seq_cst
    # This example is essentially the same as the first loose model example. Just change the memory order of the
    # atomic operation to memory_order_seq_cst

    {
        std::vector<std::thread> vt;
        for (int i = 0; i < 100; ++i) {
            vt.emplace_back([](){
                counter.fetch_add(1, std::memory_order_seq_cst);
            });
        }

        for (auto& t : vt) {
            t.join();
        }
        std::cout << "current counter:" << counter << std::endl;
    }

}