constexpr int factorial(int n) {
return n <= 1 ? 1 : (n * factorial(n - 1));
}
# loops and local variables since c++14
constexpr int loop(int i) {
if(i <= 1) return i;
int sum = 0;
for(int j = 0; j < i; ++j) {
sum += j;
}
return sum;
}
int main() {
constexpr auto s = factorial(5);
int array[s];
std::cout << s << "\n";
constexpr auto sum = loop(10);
std::cout << sum << "\n";
}
int main() {
std::vector<int> vec = {1, 2, 3, 4};
if (const std::vector<int>::iterator itr = std::find(vec.begin(), vec.end(), 3); itr != vec.end()) {
*itr = 4;
}
# after c++17, can be simplefied by using `auto`
if (auto itr = std::find(vec.begin(), vec.end(), 2); itr != vec.end()) {
std::cout << *itr << "\n";
*itr = 4;
}
}
std::tuple<int, double, std::string> f() {
return std::make_tuple(1, 2.3, "Hello");
}
int main() {
auto [x, y, z] = f();
std::cout << x << ", " << y << ", " << z << "\n";
}
template<typename R, typename T, typename U>
R add(T x, U y) {
return x+y;
}
# c++11
template<typename T, typename U>
auto add2(T x, U y) -> decltype(x+y) {
return x + y;
}
# c++14
template<typename T, typename U>
auto add3(T x, U y) {
return x + y;
}
int main () {
# before C++11
std::vector<int> vec = {1, 2, 3, 4};
std::vector<int>::const_iterator it = vec.cbegin();
# using auto
auto it2 = vec.cbegin();
auto i = 5; // i as int
auto arr = new auto(10); // arr as int *
# decltype
auto x = 1;
auto y = 2;
decltype(x + y) z;
if (std::is_same<decltype(x), int>::value)
std::cout << "type x == int" << std::endl;
if (std::is_same<decltype(x), float>::value)
std::cout << "type x == float" << std::endl;
if (std::is_same<decltype(x), decltype(z)>::value)
std::cout << "type z == type x" << std::endl;
# tail type inference
auto q = add3<double, int>(1.0, 2);
std::cout << "q: " << q << "\n";
}
template<typename T>
auto print_type_info(const T& t) {
if constexpr (std::is_integral<T>::value) {
return t + 1;
} else {
return t + 0.001;
}
}
int main() {
# if constexpr
std::cout << print_type_info(5) << "\n";
std::cout << print_type_info(3.14) << "\n";
# ranged based for loop
std::vector<int> vec = {1, 2, 3, 4};
if (auto itr = std::find(vec.begin(), vec.end(), 3); itr != vec.end())
*itr = 4;
for (auto element : vec)
std::cout << element << "\n"; # read only
for (auto &element : vec) {
element += 1; # writeable
}
}
# default parameters of the template:
template<typename T = int, typename U = int>
auto add(T x, U y) -> decltype(x+y) {
return x+y;
}
# Variadic templates
template<typename... Ts> class Magic;
# define one parameter
template<typename Require, typename... Args> class Magic2;
template<typename... Ts>
void magic_func(Ts... args) {
std::cout << sizeof...(args) << std::endl;
}
# Recursive template function
template<typename T0>
void printf1(T0 value) {
std::cout << value << std::endl;
}
template<typename T, typename... Ts>
void printf1(T value, Ts... args) {
std::cout << value << "\n";
printf1(args...);
}
# Variable parameter template expansion
template<typename T0, typename... T>
void printf2(T0 t0, T... t) {
std::cout << t0 << "\n";
if constexpr (sizeof...(t) > 0) printf2(t...);
}
# Initialize list expansion
template<typename T, typename... Ts>
auto printf3(T value, Ts... args) {
std::cout << value << "\n";
(void) std::initializer_list<T>{ ([&args] {
std::cout << args << "\n";
}(), value)...};
}
# Fold expression
template<typename ... T>
auto sum(T ... t) {
return (t + ...);
}
# Non-type template parameter deduction
template <typename T, int BufSize>
class buffer_t {
public:
T& alloc();
void free(T& item);
private:
T data[BufSize];
};
template <auto value> void foo() {
std::cout << value << "\n";
return;
}
int main () {
# default parameters of the template:
auto s = add(3, 4);
# normal
auto s2 = add<double, double>(2.1, 2.1);
std::cout << s << " " << s2 << "\n";
# Variadic templates
magic_func(); # 0
magic_func(1); # 1
magic_func(1, "Hello"); # 2
# Recursive template function
printf1(1, 2, "Good", 1.1);
# Variable parameter template expansion
printf2(1, 2, "Better", 1.1);
# Initialize list expansion
printf3(1, 2, "Best", 1.1);
# Fold expression
std::cout << "Sum: " << sum(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) << "\n";
# Non-type template parameter deduction
buffer_t<int, 100> buf; # 100 as template parameter
foo<10>();
foo<'D'>();
}
struct Base {
virtual void foo(int);
virtual void foo2() final;
};
struct SubClass: Base {
virtual void foo(int) override; # legal
#virtual void foo(float) override; # illegal, no virtual function in super class
#void foo2(); # illegal, foo has final
public:
SubClass() = default; # explicit let compiler use default constructor
SubClass& operator=(const SubClass&) = delete; # explicit declare refuse constructor
};
int main () {
# Strongly typed enumerations
enum class new_enum : unsigned int {
value1,
value2,
value3 = 100,
value4 = 100
};
if (new_enum::value3 == new_enum::value4) { # true
std::cout << "new_enum::value3 == new_enum::value4" << "\n";
}
}
# C++14 campture rvalues
void lambda_expression_capture() {
auto important = std::make_unique<int>(1);
auto add = [v1 = 1, v2 = std::move(important)](int x, int y) -> int {
return x+y+v1+(*v2);
};
std::cout << add(3,4) << std::endl;
}
# Generic Lambda
# Lambda expressions can use auto parameters
void lambda_generic() {
auto generic = [](auto x, auto y) {
return x+y;
};
std::cout << generic(1, 2) << std::endl;
std::cout << generic(1.1, 2.2) << std::endl;
}
int main () {
int value = 1;
auto copy_value = [&value] {
return value;
};
value = 100;
auto stored_value = copy_value();
std::cout << "stored_value = " << stored_value << std::endl;
}
# std::function
# C++11 std::function is a generic, polymorphic function wrapper whose instances can
##store, copy, and call any target entity that can be called. It is also an existing callable to C++.
int foo(int para) {
return para;
}
# std::bind and std::placeholder
# And std::bind is used to bind the parameters of the function call. It solves the requirement
# that we may not always be able to get all the parameters of a function at one time. Through this function,
# we can Part of the call parameters are bound to the function in advance to become a new object, and then
# complete the call after the parameters are complete
int foo2(int a, int b, int c) {
return a + b + c;
}
int main () {
# std::function wraps a function that take int paremeter and returns int value
std::function<int(int)> func = foo;
int important = 10;
std::function<int(int)> func2 = [&](int value) -> int {
return 1+value+important;
};
std::cout << func(10) << std::endl;
std::cout << func2(10) << std::endl;
# std::bind and std::placeholder
# bind parameter 1, 2 on function foo, and use std::placeholders::_1 as placeholder
# for the first parameter.
auto bindFoo = std::bind(foo2, std::placeholders::_1, 1,2);
# when call bindFoo, we only need one param left
std::cout << bindFoo(1) << "\n";
}
# pvalue, pure rvalue, purely rvalue, either purely literal, such as 10, true; either the result of
# the evaluation is equivalent to a literal or anonymous temporary object, for example 1+2. Temporary
# variables returned by non-references, temporary variables generated by operation expressions, original
# literals, and Lambda expressions are all pure rvalue values.
# xvalue, expiring value is the concept proposed by C++11 to introduce rvalue references (so in traditional C++,
# pure rvalue and rvalue are the same concept), that is, A value that is destroyed but can be moved.
# To get a xvalue, you need to use the declaration of the rvalue reference: T &&, where T is the type.
# The statement of the rvalue reference extends the lifecycle of this temporary value, and as long as the
# variable is alive, the xvalue will continue to survive.
# C++11 provides the std::move method to unconditionally convert lvalue parameters to rvalues. With it we can
# easily get a rvalue temporary object
void reference(int& v) {
std::cout << "lvalue reference" << std::endl;
}
void reference(int&& v) {
std::cout << "rvalue reference" << std::endl;
}
template <typename T>
void pass(T&& v) {
std::cout << " normal param passing: ";
reference(v);
std::cout << " std::move param passing: ";
reference(std::move(v));
std::cout << " std::forward param passing: ";
reference(std::forward<T>(v));
std::cout << "static_cast<T&&> param passing: ";
reference(static_cast<T&&>(v));
}
int main () {
std::cout << "rvalue pass:" << std::endl;
pass(1);
std::cout << "lvalue pass:" << std::endl;
int l = 1;
pass(l);
}
int main () {
# std::array
std::array<int, 4> arr = {1, 2, 3, 4};
arr.empty(); # check if container is empty
arr.size(); # return the size of the container
# iterator support
for (auto &i : arr) {
# ...
}
# use lambda expression for sort
std::sort(arr.begin(), arr.end(), [](int a, int b) {
return b < a;
});
# array size must be constexpr
constexpr int len = 4;
std::array<int, len> arr2 = {1, 2, 3, 4};
# When we started using std::array, it was inevitable that we would encounter a C-style compatible
# interface. There are three ways to do this:
int* p = &arr[0];
int* pp = arr.data();
# std::tuple (std::pair can save only two parameter)
# std::make_tuple: construct tuple
# std::get: Get the value of a position in the tuple
# std::tie: tuple unpacking
auto student = std::make_tuple(3.8, 'A', "John");
std::cout << "ID: 0, "
<< "GPA: " << std::get<0>(student) << ", "
<< "Grade: " << std::get<1>(student) << ", "
<< "Name: " << std::get<2>(student) << '\n';
double gpa;
char grade;
std::string name;
# unpack tuples
std::tie(gpa, grade, name) = std::make_tuple(2.8, 'B', "Balazs");
std::cout << "ID: 1, "
<< "GPA: " << gpa << ", "
<< "Grade: " << grade << ", "
<< "Name: " << name << '\n';
# Merge and Iteration, Another common requirement is to merge two tuples, which can be done with
# std::tuple_cat:
std::tuple<std::string, double, double, int> t("123", 4.5, 6.7, 8);
auto new_tuple = std::tuple_cat(std::make_tuple(2.8, 'B', "Balazs"), std::move(t));
}
# The basic idea is to count the number of dynamically allocated objects. Whenever you add a reference to
# the same object, the reference count of the referenced object is incremented once. Each time a reference is
# deleted, the reference count is decremented by one. When the reference count of an object is reduced to zero,
# the pointed heap memory is automatically deleted.
# std::shared_ptr
void foo(std::shared_ptr<int> i) {
(*i)++;
}
# unique_ptr
struct Foo {
Foo() { std::cout << "Foo::Foo" << std::endl; }
~Foo() { std::cout << "Foo::~Foo" << std::endl; }
void foo() { std::cout << "Foo::foo" << std::endl; }
};
void f(const Foo &) {
std::cout << "f(const Foo&)" << std::endl;
}
int main () {
# std::shared_ptr
# std::shared_ptr is a smart pointer that records how many shared_ptr points to an object, eliminating the
# display call delete, which automatically deletes the object when the reference count becomes zero.
# But not enough, because using std::shared_ptr still needs to be called with new, which makes the code a
# certain degree of asymmetry.
# std::make_shared can be used to eliminate the explicit use of new, so std::make_shared will allocate the
# objects in the generated parameters. And return the std::shared_ptr pointer of this object type.
{
auto pointer = std::make_shared<int>(10);
foo(pointer);
std::cout << *pointer << std::endl; // 11
# The shared_ptr will be destructed before leaving the scope
}
# std::shared_ptr can get the raw pointer through the get() method and reduce the reference count by reset().
# And see the reference count of an object by use_count()
{
auto pointer = std::make_shared<int>(10);
auto pointer2 = pointer; # reference count+1
auto pointer3 = pointer; # reference count+1
int *p = pointer.get(); # no increase of reference count
std::cout << "pointer.use_count() = " << pointer.use_count() << std::endl; # 3
std::cout << "pointer2.use_count() = " << pointer2.use_count() << std::endl; # 3
std::cout << "pointer3.use_count() = " << pointer3.use_count() << std::endl; # 3
pointer2.reset();
std::cout << "reset pointer2:" << std::endl;
std::cout << "pointer.use_count() = " << pointer.use_count() << std::endl; # 2
std::cout << "pointer2.use_count() = " << pointer2.use_count() << std::endl; # 0, pointer2 has reset
std::cout << "pointer3.use_count() = " << pointer3.use_count() << std::endl; # 2
pointer3.reset();
std::cout << "reset pointer3:" << std::endl;
std::cout << "pointer.use_count() = " << pointer.use_count() << std::endl; # 1
std::cout << "pointer2.use_count() = " << pointer2.use_count() << std::endl; # 0
std::cout << "pointer3.use_count() = " << pointer3.use_count() << std::endl; # 0, pointer3 has reset
}
# std::unique_ptr
# std::unique_ptr is an exclusive smart pointer that prohibits other smart pointers from sharing the same
# object, thus keeping the code safe:
std::unique_ptr<int> pointer = std::make_unique<int>(10); # make_unique was introduced in C++14
# std::unique_ptr<int> pointer2 = pointer; // illegal
# Since it is monopolized, in other words, it cannot be copied. However, we can use std::move to transfer
# it to other unique_ptr
{
std::unique_ptr<Foo> p1(std::make_unique<Foo>());
# p1 is not empty, prints
if (p1)
p1->foo();
{
std::unique_ptr<Foo> p2(std::move(p1));
# p2 is not empty, prints
f(*p2);
# p2 is not empty, prints
if(p2)
p2->foo();
# p1 is empty, no prints
if(p1)
p1->foo();
p1 = std::move(p2);
# p2 is empty, no prints
if(p2)
p2->foo();
std::cout << "p2 was destroied" << std::endl;
}
# p1 is not empty, prints
if (p1)
p1->foo();
# Foo instance will be destroied when leaving the scope
}
# std::weak_ptr
# it there is a cycle reference shared pointers cannot be released so we can solve this by using weak Pointers
# The solution to this problem is to use the weak reference pointer std::weak_ptr, which is a weak reference
# (compared to std::shared_ptr is a strong reference). A weak reference does not cause an increase in the
# reference count
# std::weak_ptr has no * operator and -> operator, so it can't operate on resources. Its only function is to
# check if std::shared_ptr exists, its expired() The method can return true when the resource is not released,
# otherwise it returns false.
}
int main () {
std::string fnames[] = {"foo.txt", "bar.txt", "test", "a0.txt", "AAA.txt"};
# In C++, `\` will be used as an escape character in the string. In order for `\.` to be passed as a
# regular expression, it is necessary to perform second escaping of `\`, thus we have `\\.`
std::regex txt_regex("[a-z]+\\.txt");
for (const auto &fname: fnames)
std::cout << fname << ": " << std::regex_match(fname, txt_regex) << std::endl;
# Another common form is to pass in the three arguments std::string/std::smatch/std::regex. The essence
# of std::smatch is actually std::match_results. In the standard library, std::smatch is defined as
# std::match_results<std::string::const_iterator>, which means match_results of a substring iterator type.
# Use std::smatch to easily get the matching results
std::regex base_regex("([a-z]+)\\.txt");
std::smatch base_match;
for(const auto &fname: fnames) {
if (std::regex_match(fname, base_match, base_regex)) {
# the first element of std::smatch matches the entire string
# the second element of std::smatch matches the first expression with brackets
if (base_match.size() == 2) {
std::string base = base_match[1].str();
std::cout << "sub-match[0]: " << base_match[0].str() << std::endl;
std::cout << fname << " sub-match[1]: " << base << std::endl;
}
}
}
}
int v = 1;
void critical_section(int change_v) {
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
# execute contention works
v = change_v;
# mtx will be released after leaving the scope
}
void critical_section2(int change_v) {
static std::mutex mtx;
std::unique_lock<std::mutex> lock(mtx);
# do contention operations
v = change_v;
std::cout << v << std::endl;
# release the lock
lock.unlock();
# during this period,
# others are allowed to acquire v
# start another group of contention operations
# lock again
lock.lock();
v += 1;
std::cout << v << std::endl;
}
std::atomic<int> count = {0};
std::atomic<int> counter = {0};
struct A {
float x;
int y;
long long z;
};
int main () {
# Basic of Parallelism
# std::thread is used to create an execution thread instance, so it is the basis for all concurrent
# programming. It needs to include the <thread> header file when using it. It provides a number of basic
# thread operations, such as get_id() to get the thread ID of the thread being created, use join() to join
# a thread
std::thread t([](){
std::cout << "hello world." << std::endl;
});
t.join();
# Mutex and Critical Section
# std::mutex is the most basic mutex class in C++11, and you can create a mutex by instantiating std::mutex.
# It can be locked by its member function lock(), and unlock() can be unlocked. But in the process of actually
# writing the code, it is best not to directly call the member function, Because calling member functions,
# you need to call unlock() at the exit of each critical section, and of course, exceptions. At this time,
# C++11 also provides a template class std::lock_gurad for the RAII syntax for the mutex.
# RAII guarantees the exceptional security of the code while losing the simplicity of the code.
std::thread t1(critical_section, 2), t2(critical_section, 3);
t1.join();
t2.join();
# v is global variable now
std::cout << v << std::endl;
# Because C++ guarantees that all stack objects will be destroyed at the end of the declaration period, such
# code is also extremely safe. Whether critical_section() returns normally or if an exception is thrown in the
# middle, a stack rollback is thrown, and unlock() is automatically called.
# And std::unique_lock is more flexible than std::lock_guard, std::unique_lock is more flexible. Objects of
# std::unique_lock manage the locking and unlocking operations on the mutex object with exclusive ownership
# (no other unique_lock objects owning the ownership of a mutex object). So in concurrent programming, it is
# recommended to use std::unique_lock.
# std::lock_guard cannot explicitly call lock and unlock, and std::unique_lock can be called anywhere after
# the declaration. It can reduce the scope of the lock and provide higher concurrency.
# If you use the condition variable std::condition_variable::wait you must use std::unique_lock as a parameter.
std::thread t3(critical_section2, 2), t4(critical_section2, 3);
t3.join();
t4.join();
# Future
# std::future, which provides a way to access the results of asynchronous operations.
# Imagine if our main thread A wants to open a new thread B to perform some of our expected tasks and return
# me a result. At this time, thread A may be busy with other things, and have no time to take into account
# the results of B. So we naturally hope to get the result of thread B at a certain time.
# Before the introduction of std::future in C++11, the usual practice is: Create a thread A, start task B in
# thread A, send an event when it is ready, and save the result in a global variable. The main function thread
# A is doing other things. When the result is needed, a thread is called to wait for the function to get the
# result of the execution.
# The std::future provided by C++11 simplifies this process and can be used to get the results of asynchronous
# tasks. Naturally, we can easily imagine it as a simple means of thread synchronization, namely the barrier.
# pack a lambda expression that returns 7 into a std::packaged_task
std::packaged_task<int()> task([](){return 7;});
# get the future of task
std::future<int> result = task.get_future(); # run task in a thread
std::thread(std::move(task)).detach();
std::cout << "waiting...";
result.wait(); // block until future has arrived
# output result
std::cout << "done!" << std:: endl << "future result is " << result.get() << std::endl;
# Condition Variable
# The condition variable std::condition_variable was born to solve the deadlock and was introduced when the
# mutex operation was not enough. For example, a thread may need to wait for a condition to be true
# to continue execution. A dead wait loop can cause all other threads to fail to enter the critical section
# so that when the condition is true, a deadlock occurs. Therefore, the condition_variable instance is created
# primarily to wake up the waiting thread and avoid deadlocks. notd_one() of std::condition_variable is used
# to wake up a thread; notify_all() is to notify all threads.
std::queue<int> produced_nums;
std::mutex mtx;
std::condition_variable cv;
bool notified = false; # notification sign
auto producer = [&]() {
for (int i = 0; ; i++) {
std::this_thread::sleep_for(std::chrono::milliseconds(500));
std::unique_lock<std::mutex> lock(mtx);
std::cout << "producing " << i << std::endl;
produced_nums.push(i);
notified = true;
cv.notify_all();
}
};
auto consumer = [&]() {
while (true) {
std::unique_lock<std::mutex> lock(mtx);
while (!notified) { # avoid spurious wakeup
cv.wait(lock);
}
# temporal unlock to allow producer produces more rather than
# let consumer hold the lock until its consumed.
lock.unlock();
std::this_thread::sleep_for(std::chrono::milliseconds(1000)); # consumer is slower
lock.lock();
if (!produced_nums.empty()) {
std::cout << "consuming " << produced_nums.front() << std::endl;
produced_nums.pop();
}
notified = false;
}
};
std::thread p(producer);
std::thread cs[2];
for (int i = 0; i < 2; ++i) {
cs[i] = std::thread(consumer);
}
p.join();
for (int i = 0; i < 2; ++i) {
cs[i].join();
}
# Atomic Operation and Memory Model
# std::mutex can solve the problem of concurrent read and write, but the mutex is an operating system level
# function. This is because the implementation of a mutex usually contains two basic principles:
# Provide automatic state transition between threads, that is, "lock" state
# Ensure that the memory of the manipulated variable is isolated from the critical section during the mutex
# operation
# This is a very strong set of synchronization conditions, in other words, when it is finally compiled into a
# CPU instruction, it will behave as a lot of instructions (we will look at how to implement a simple mutex
# later). This seems too harsh for a variable that requires only atomic operations (no intermediate state).
# The research on synchronization conditions has a very long history, and we will not go into details here.
# Readers should understand that under the modern CPU architecture, atomic operations at the CPU instruction
# level are provided. Therefore, in the C + + 11 multi-threaded shared variable reading and writing, the
# introduction of the std::atomic template, so that we instantiate an atomic type, will be a Atomic type read
# and write operations are minimized from a set of instructions to a single CPU instruction. E.g:
# And provides basic numeric member functions for atomic types of integers or floating-point numbers, for
# example, Including fetch_add, fetch_sub, etc., and the corresponding +, - version is provided by overload.
{
# count is a global variable
std::thread t1([](){
count.fetch_add(1);
});
std::thread t2([](){
count++; # identical to fetch_add
count += 1; # identical to fetch_add
});
t1.join();
t2.join();
std::cout << count << std::endl;
}
std::atomic<A> a;
std::cout << std::boolalpha << a.is_lock_free() << std::endl;
# Memory Orders
# In order to achieve the ultimate performance and achieve consistency of various strength requirements,
# C++11 defines six different memory sequences for atomic operations. The option std::memory_order expresses
# four synchronization models between multiple threads
# Relaxed model:
# Under this model, atomic operations within a single thread are executed sequentially, and instruction
# reordering is not allowed, but the order of atomic operations between different threads is arbitrary. The
# type is specified by std::memory_order_relaxed
{
std::vector<std::thread> vt;
for (int i = 0; i < 100; ++i) {
vt.emplace_back([](){
counter.fetch_add(1, std::memory_order_relaxed);
});
}
for (auto& t : vt) {
t.join();
}
std::cout << "current counter:" << counter << std::endl;
}
# Release/consumption model:
# In this model, we begin to limit the order of operations between processes. If a thread needs to modify a
# value, but another thread will have a dependency on that operation of the value, that is, the latter depends.
# former. Specifically, thread A has completed three writes to x, and thread B relies only on the third x write
# operation, regardless of the first two write behaviors of x, then A When active x.release() (ie using
# std::memory_order_release), the option std::memory_order_consume ensures that B observes A when calling
# x.load() Three writes to x.
{
std::atomic<int*> ptr;
int v;
std::thread producer([&]() {
int* p = new int(42);
v = 1024;
ptr.store(p, std::memory_order_release);
});
std::thread consumer([&]() {
int* p;
while(!(p = ptr.load(std::memory_order_consume)));
std::cout << "p: " << *p << std::endl;
std::cout << "v: " << v << std::endl;
});
producer.join();
consumer.join();
}
# Release/Acquire model:
# Under this model, we can further tighten the order of atomic operations between different threads,
# specifying the timing between releasing std::memory_order_release and getting std::memory_order_acquire.
# All write operations before the release operation are visible to any other thread, ie, happens-before.
# As you can see, std::memory_order_release ensures that the write behavior after it does not occur before
# the release operation, is a backward barrier, and std::memory_order_acquire ensures the previous write
# behavior after it, no It will happen after the get operation, it is a forward barrier. For the option
# std::memory_order_acq_rel, it combines the characteristics of the two, and only determines a memory
# barrier, so that the current thread reads and writes to the memory. Will not be rearranged before and after
# this operation.
{
std::vector<int> v;
std::atomic<int> flag = {0};
std::thread release([&]() {
v.push_back(42);
flag.store(1, std::memory_order_release);
});
std::thread acqrel([&]() {
int expected = 1; # must before compare_exchange_strong
while(!flag.compare_exchange_strong(expected, 2, std::memory_order_acq_rel)) {
expected = 1; # must after compare_exchange_strong
}
# flag has changed to 2
});
std::thread acquire([&]() {
while(flag.load(std::memory_order_acquire) < 2);
std::cout << v.at(0) << std::endl; # must be 42
});
release.join();
acqrel.join();
acquire.join();
}
# In this case we used compare_exchange_strong, which is the Compare-and-swap primitive, which has a weaker
# version, compare_exchange_weak, which allows a failure to be returned even if the exchange is successful.
# The reason is due to a false failure on some platforms, specifically, when the CPU performs a context
# switch, another thread loads the same address to produce an inconsistency. In addition, the performance
# of compare_exchange_strong may be slightly worse than compare_exchange_weak, but in most cases,
# compare_exchange_strong should be limited.
# Sequential Consistent Model:
# Under this model, atomic operations satisfy sequence consistency, which in turn can cause performance loss.
# It can be specified explicitly by std::memory_order_seq_cst
# This example is essentially the same as the first loose model example. Just change the memory order of the
# atomic operation to memory_order_seq_cst
{
std::vector<std::thread> vt;
for (int i = 0; i < 100; ++i) {
vt.emplace_back([](){
counter.fetch_add(1, std::memory_order_seq_cst);
});
}
for (auto& t : vt) {
t.join();
}
std::cout << "current counter:" << counter << std::endl;
}
}