Post

Reinforcement Learning finale (Part 7)

Reinforcement Learning finale (Part 7)

Final part of the series is here. Hope you have enjoyed as much as I have enjoyed making this.

We are now going to train our DQN model.

System requirements :-

  • TensorFlow : 2.4.1
  • Dedicated GPU

⚠️ Warning : this program has a high computational requirements. Please do not train on a CPU.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
    import tensorflow as tf
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten
    from keras.callbacks import TensorBoard
    from keras.optimizers import Adam
    from collections import deque
    import time
    import numpy as np
    from tqdm import tqdm
    import random
    import os
    from PIL import Image as Img
    import cv2

    REPLAY_MEMORY_SIZE = 50_000
    MODEL_NAME = "256x2"
    MIN_REPLAY_MEMORY_SIZE = 1_000

    MINIBATCH_SIZE = 64 # batch size for training data

    DISCOUNT = 0.99
    UPDATE_TARGET_EVERY = 5
    MIN_REWARD = -200
    EPISODES = 20_000

    epsilon = 1
    EPSILON_DECAY = 0.99975
    MIN_EPSILON = 0.001

    AGGREGATE_STATS_EVERY = 100 # see stats every 100 episodes

    SHOW_PREVIEW = False

💢 Blob class

Now we want to bring in our blob class and blob environment.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
    class Blob:
        def __init__(self, size):
            self.size = size
            self.x = np.random.randint(0, size)
            self.y = np.random.randint(0, size)

        def __str__(self):
            return f"Blob ({self.x}, {self.y})"

        def __sub__(self, other):
            return (self.x-other.x, self.y-other.y)

        def __eq__(self, other):
            return self.x == other.x and self.y == other.y

We have 9 total movement options : 0, 1, 2, 3, 4, 5, 6, 7, 8

So, now our agent doesn’t need to take help of the boundaries to go up-down or left-right :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
        def action(self, choice):
            if choice == 0:
                self.move(x=1, y=1)
            elif choice == 1:
                self.move(x=-1, y=-1)
            elif choice == 2:
                self.move(x=-1, y=1)
            elif choice == 3:
                self.move(x=1, y=-1)

            elif choice == 4:
                self.move(x=1, y=0)
            elif choice == 5:
                self.move(x=-1, y=0)

            elif choice == 6:
                self.move(x=0, y=1)
            elif choice == 7:
                self.move(x=0, y=-1)

            elif choice == 8:
                self.move(x=0, y=0)

Updated method ‘move’ for TF 2.4.1 :-

1
        def move(self, x = None, y = None):

If no value for x, move randomly :

1
2
3
4
            if x == None:
                self.x += np.random.randint(-1, 2)
            else:
                self.x += x

If no value for y, move randomly :

1
2
3
4
            if y == None:
                self.y += np.random.randint(-1, 2)
            else:
                self.y += y

If we are out of bounds, fix :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
            if self.x < 0:
                self.x = 0
            elif self.x > self.size-1:
                self.x = self.size-1
            if self.y < 0:
                self.y = 0
            elif self.y > self.size-1:
                self.y = self.size-1

    class BlobEnv:
        SIZE = 10
        RETURN_IMAGES = True
        MOVE_PENALTY = 1
        ENEMY_PENALTY = 300
        FOOD_REWARD = 25
        OBSERVATION_SPACE_VALUES = (SIZE, SIZE, 3)  # 4
        ACTION_SPACE_SIZE = 9

Player key in dict :

1
        PLAYER_N = 1

Food key in dict :

1
        FOOD_N = 2

Enemy key in dict :

1
        ENEMY_N = 3

The dict (colours) :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
        d = {1: (255, 175, 0),
            2: (0, 255, 0),
            3: (0, 0, 255)}

        def reset(self):
            self.player = Blob(self.SIZE)
            self.food = Blob(self.SIZE)
            while self.food == self.player:
                self.food = Blob(self.SIZE)
            self.enemy = Blob(self.SIZE)
            while self.enemy == self.player or self.enemy == self.food:
                self.enemy = Blob(self.SIZE)

            self.episode_step = 0

            if self.RETURN_IMAGES:
                observation = np.array(self.get_image())
            else:
                observation = (self.player-self.food) + (self.player-self.enemy)
            return observation

        def step(self, action):
            self.episode_step += 1
            self.player.action(action)

💢 Movement options

To make the food and enemy move, we can use :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# self.enemy.move()

# self.food.move()

            if self.RETURN_IMAGES:
                new_observation = np.array(self.get_image())
            else:
                new_observation = (self.player-self.food) + (self.player-self.enemy)

            if self.player == self.enemy:
                reward = -self.ENEMY_PENALTY
            elif self.player == self.food:
                reward = self.FOOD_REWARD
            else:
                reward = -self.MOVE_PENALTY

            done = False
            if reward == self.FOOD_REWARD or reward == -self.ENEMY_PENALTY or self.episode_step >= 200:
                done = True

            return new_observation, reward, done

        def render(self):
            img = self.get_image()

💢 Scaling our agent

1
            img = img.resize((300, 300))

Show it :

1
2
            cv2.imshow("image", np.array(img))
            cv2.waitKey(1)

💢 FOR CNN

1
        def get_image(self):

Starts an rbg of our size :

1
            env = np.zeros((self.SIZE, self.SIZE, 3), dtype=np.uint8)

Sets the food location tile to green color :

1
            env[self.food.x][self.food.y] = self.d[self.FOOD_N]

Sets the enemy location to red :

1
            env[self.enemy.x][self.enemy.y] = self.d[self.ENEMY_N]

Sets the player tile to blue :

1
            env[self.player.x][self.player.y] = self.d[self.PLAYER_N]

Reading to rgb, even tho color definitions are bgr :

1
2
3
            img = Img.fromarray(env, 'RGB')

            return img

💢 Initailizing environment

1
    env = BlobEnv()

For stats :

1
    ep_rewards = [-200]

For more repetitive results :

1
2
3
    random.seed(1)
    np.random.seed(1)
    tf.random.set_seed(1)

Create models folder :

1
2
    if not os.path.isdir('models'):
        os.makedirs('models')

💢 TensorFlow 2.4.1

Following class is modified to work with TensorFlow 2.4.1

Please do not change this class in any manner, or the program may not work at all. If you face some issue regarding this class, please create a discussion.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
    class ModifiedTensorBoard(TensorBoard):

        def __init__(self, **kwargs):
            super().__init__(**kwargs)
            self.step = 1
            self.writer = tf.summary.create_file_writer(self.log_dir)
            self._log_write_dir = self.log_dir

        def set_model(self, model):
            self.model = model

            self._train_dir = os.path.join(self._log_write_dir, 'train')
            self._train_step = self.model._train_counter

            self._val_dir = os.path.join(self._log_write_dir, 'validation')
            self._val_step = self.model._test_counter

            self._should_write_train_graph = False

        def on_epoch_end(self, epoch, logs=None):
            self.update_stats(**logs)

        def on_batch_end(self, batch, logs=None):
            pass

        def on_train_end(self, _):
            pass

        def update_stats(self, **stats):
            with self.writer.as_default():
                for key, value in stats.items():
                    tf.summary.scalar(key, value, step = self.step)
                    self.writer.flush()

Following class is already explained in previous codes. Please go through them if you face any issues.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
    class DQNAgent:
        def __init__(self):
            self.model = self.create_model()

            self.target_model = self.create_model()
            self.target_model.set_weights(self.model.get_weights())

            self.replay_memory = deque(maxlen = REPLAY_MEMORY_SIZE)

            self.tensorboard = ModifiedTensorBoard(log_dir = f"logs/{MODEL_NAME}-{int(time.time())}")

            self.target_update_counter = 0

        def create_model(self):
            model = Sequential()
            model.add(Conv2D(256, (3, 3), input_shape = env.OBSERVATION_SPACE_VALUES))
            model.add(Activation("relu"))
            model.add(MaxPooling2D(2, 2))
            model.add(Dropout(0.2))

            model.add(Conv2D(256, (3, 3)))
            model.add(Activation("relu"))
            model.add(MaxPooling2D(2, 2))
            model.add(Dropout(0.2))

            model.add(Flatten())
            model.add(Dense(64))

            model.add(Dense(env.ACTION_SPACE_SIZE, activation = "linear"))
            model.compile(loss="mse", optimizer = Adam(lr=0.001), metrics=['accuracy'])

            return model

        def update_replay_memory(self, transition):
            self.replay_memory.append(transition)

        def get_qs(self, state):
            return self.model.predict(np.array(state).reshape(-1, *state.shape) / 255)[0]

        def train(self, terminal_state, step):
            if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
                return

            minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)

💢 Teaching CNNs

By dividing in the following statement, we are trying to scale the images between 0 and 1 because that is the best way to teach convolutional neural networks :

1
2
3
            current_states = np.array([transition[0] for transition in minibatch]) / 255

            current_qs_list = self.model.predict(current_states)

Current states after actions are taken :

1
2
            new_current_states = np.array([transition[3] for transition in minibatch]) / 255
            future_qs_list = self.target_model.predict(new_current_states)

Following list will be the images from the game :

1
            X = []

Following list will be the actions that model decides to take :

1
            y = []

💢 Calculate learned value

With the following loop, we will be able to calculate the last bit (learned value) of the Q-value formula :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
            for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
                if not done:
                    max_future_q = np.max(future_qs_list[index])
                    new_q = reward + DISCOUNT * max_future_q
                else:
                    new_q = reward

                current_qs = current_qs_list[index]
                current_qs[action] = new_q

                X.append(current_state)
                y.append(current_qs)

            self.model.fit(np.array(X) / 255, np.array(y), batch_size = MINIBATCH_SIZE, verbose = 0, shuffle = False,

We will fit only if we are on the terminal state :

1
            callbacks = [self.tensorboard] if terminal_state else None)

Whether we want to update the target_model yet :

1
2
3
4
5
6
            if terminal_state:
                self.target_update_counter += 1

            if self.target_update_counter > UPDATE_TARGET_EVERY:
                self.target_model.set_weights(self.model.get_weights())
                self.target_update_counter = 0

💢 Agent creation

Create agent :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
    agent = DQNAgent()

    # Now we are ready to iterate over everything.

    for episode in tqdm(range(1, EPISODES + 1), ascii = True, unit = "episode"):
        agent.tensorboard.step = episode

        episode_reward = 0
        step = 1
        current_state = env.reset()

        done = False

        while not done:
            if np.random.random() > epsilon:
                action = np.argmax(agent.get_qs(current_state))
            else:
                action = np.random.randint(0, env.ACTION_SPACE_SIZE)

            new_state, reward, done = env.step(action)

            episode_reward += reward

            if SHOW_PREVIEW and not episode % AGGREGATE_STATS_EVERY:
                env.render()

            agent.update_replay_memory((current_state, action, reward, new_state, done))
            agent.train(done, step)

            current_state = new_state
            step += 1

💢 Episode rewards

Now we are going to append episode reward and then we will grab various aggregate stats. Then we will create a matplotlib chart with those values.

Append episode reward to a list and log stats (every given number of episodes) :

1
2
3
4
5
6
        ep_rewards.append(episode_reward)
        if not episode % AGGREGATE_STATS_EVERY or episode == 1:
            average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
            min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
            max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
            agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)

Save model, but only when min reward is greater or equal a set value :

1
2
            if min_reward >= MIN_REWARD:
                agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')

Decay epsilon :

1
2
3
4
        if epsilon > MIN_EPSILON:
            epsilon *= EPSILON_DECAY
            epsilon = max(MIN_EPSILON, epsilon)

💢 Entire code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten
from keras.callbacks import TensorBoard
from keras.optimizers import Adam
from collections import deque
import time
import numpy as np
from tqdm import tqdm
import random
import os
from PIL import Image as Img
import cv2

REPLAY_MEMORY_SIZE = 50_000
MODEL_NAME = "256x2"
MIN_REPLAY_MEMORY_SIZE = 1_000
MINIBATCH_SIZE = 64     # batch size for training data
DISCOUNT = 0.99
UPDATE_TARGET_EVERY = 5
MIN_REWARD = -200
EPISODES = 20_000

epsilon = 1
EPSILON_DECAY = 0.99975
MIN_EPSILON = 0.001

AGGREGATE_STATS_EVERY = 100     # number of episodes to see stats = 100
SHOW_PREVIEW = False

class Blob:
    def __init__(self, size):
        self.size = size
        self.x = np.random.randint(0, size)
        self.y = np.random.randint(0, size)

    def __str__(self):
        return f"Blob ({self.x}, {self.y})"

    def __sub__(self, other):
        return (self.x-other.x, self.y-other.y)

    def __eq__(self, other):
        return self.x == other.x and self.y == other.y

    def action(self, choice):
        if choice == 0:
            self.move(x=1, y=1)
        elif choice == 1:
            self.move(x=-1, y=-1)
        elif choice == 2:
            self.move(x=-1, y=1)
        elif choice == 3:
            self.move(x=1, y=-1)

        elif choice == 4:
            self.move(x=1, y=0)
        elif choice == 5:
            self.move(x=-1, y=0)

        elif choice == 6:
            self.move(x=0, y=1)
        elif choice == 7:
            self.move(x=0, y=-1)

        elif choice == 8:
            self.move(x=0, y=0)

    def move(self, x = None, y = None):
        if x == None:
            self.x += np.random.randint(-1, 2)
        else:
            self.x += x

        if y == None:
            self.y += np.random.randint(-1, 2)
        else:
            self.y += y

        if self.x < 0:
            self.x = 0
        elif self.x > self.size-1:
            self.x = self.size-1
        if self.y < 0:
            self.y = 0
        elif self.y > self.size-1:
            self.y = self.size-1


class BlobEnv:
    SIZE = 10
    RETURN_IMAGES = True
    MOVE_PENALTY = 1
    ENEMY_PENALTY = 300
    FOOD_REWARD = 25
    OBSERVATION_SPACE_VALUES = (SIZE, SIZE, 3)  # 4
    ACTION_SPACE_SIZE = 9
    PLAYER_N = 1
    FOOD_N = 2
    ENEMY_N = 3

    d = {1: (255, 175, 0),
         2: (0, 255, 0),
         3: (0, 0, 255)}

    def reset(self):
        self.player = Blob(self.SIZE)
        self.food = Blob(self.SIZE)
        while self.food == self.player:
            self.food = Blob(self.SIZE)
        self.enemy = Blob(self.SIZE)
        while self.enemy == self.player or self.enemy == self.food:
            self.enemy = Blob(self.SIZE)

        self.episode_step = 0

        if self.RETURN_IMAGES:
            observation = np.array(self.get_image())
        else:
            observation = (self.player-self.food) + (self.player-self.enemy)
        return observation

    def step(self, action):
        self.episode_step += 1
        self.player.action(action)

        if self.RETURN_IMAGES:
            new_observation = np.array(self.get_image())
        else:
            new_observation = (self.player-self.food) + (self.player-self.enemy)

        if self.player == self.enemy:
            reward = -self.ENEMY_PENALTY
        elif self.player == self.food:
            reward = self.FOOD_REWARD
        else:
            reward = -self.MOVE_PENALTY

        done = False
        if reward == self.FOOD_REWARD or reward == -self.ENEMY_PENALTY or self.episode_step >= 200:
            done = True

        return new_observation, reward, done

    def render(self):
        img = self.get_image()
        img = img.resize((300, 300))
        cv2.imshow("image", np.array(img))
        cv2.waitKey(1)

    def get_image(self):
        env = np.zeros((self.SIZE, self.SIZE, 3), dtype=np.uint8)
        env[self.food.x][self.food.y] = self.d[self.FOOD_N]
        env[self.enemy.x][self.enemy.y] = self.d[self.ENEMY_N]
        env[self.player.x][self.player.y] = self.d[self.PLAYER_N]
        img = Img.fromarray(env, 'RGB')
        return img


env = BlobEnv()

ep_rewards = [-200]

random.seed(1)
np.random.seed(1)
tf.random.set_seed(1)

# Create models folder
if not os.path.isdir('models'):
    os.makedirs('models')

class ModifiedTensorBoard(TensorBoard):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.step = 1
        self.writer = tf.summary.create_file_writer(self.log_dir)
        self._log_write_dir = self.log_dir

    def set_model(self, model):
        self.model = model

        self._train_dir = os.path.join(self._log_write_dir, 'train')
        self._train_step = self.model._train_counter

        self._val_dir = os.path.join(self._log_write_dir, 'validation')
        self._val_step = self.model._test_counter

        self._should_write_train_graph = False

    def on_epoch_end(self, epoch, logs=None):
        self.update_stats(**logs)

    def on_batch_end(self, batch, logs=None):
        pass

    def on_train_end(self, _):
        pass

    def update_stats(self, **stats):
        with self.writer.as_default():
            for key, value in stats.items():
                tf.summary.scalar(key, value, step = self.step)
                self.writer.flush()

class DQNAgent:
    def __init__(self):
        self.model = self.create_model()

        self.target_model = self.create_model()
        self.target_model.set_weights(self.model.get_weights())

        self.replay_memory = deque(maxlen = REPLAY_MEMORY_SIZE)

        self.tensorboard = ModifiedTensorBoard(log_dir = f"logs/{MODEL_NAME}-{int(time.time())}")
        self.target_update_counter = 0

    def create_model(self):
        model = Sequential()
        model.add(Conv2D(256, (3, 3), input_shape = env.OBSERVATION_SPACE_VALUES))
        model.add(Activation("relu"))
        model.add(MaxPooling2D(2, 2))
        model.add(Dropout(0.2))

        model.add(Conv2D(256, (3, 3)))
        model.add(Activation("relu"))
        model.add(MaxPooling2D(2, 2))
        model.add(Dropout(0.2))

        model.add(Flatten())
        model.add(Dense(64))

        model.add(Dense(env.ACTION_SPACE_SIZE, activation = "linear"))
        model.compile(loss="mse", optimizer = Adam(lr=0.001), metrics=['accuracy'])

        return model

    def update_replay_memory(self, transition):
        self.replay_memory.append(transition)

    def get_qs(self, state):
        return self.model.predict(np.array(state).reshape(-1, *state.shape) / 255)[0]

    def train(self, terminal_state, step):
        if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
            return

        minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)

        current_states = np.array([transition[0] for transition in minibatch]) / 255
        current_qs_list = self.model.predict(current_states)

        new_current_states = np.array([transition[3] for transition in minibatch]) / 255
        future_qs_list = self.target_model.predict(new_current_states)

        X = []
        y = []

        for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
            if not done:
                max_future_q = np.max(future_qs_list[index])
                new_q = reward + DISCOUNT * max_future_q
            else:
                new_q = reward

            current_qs = current_qs_list[index]
            current_qs[action] = new_q

            X.append(current_state)
            y.append(current_qs)

        self.model.fit(np.array(X) / 255, np.array(y), batch_size = MINIBATCH_SIZE, verbose = 0, shuffle = False,
        callbacks = [self.tensorboard] if terminal_state else None)

        if terminal_state:
            self.target_update_counter += 1

        if self.target_update_counter > UPDATE_TARGET_EVERY:
            self.target_model.set_weights(self.model.get_weights())
            self.target_update_counter = 0

agent = DQNAgent()

for episode in tqdm(range(1, EPISODES + 1), ascii = True, unit = "episode"):
    agent.tensorboard.step = episode

    episode_reward = 0
    step = 1
    current_state = env.reset()

    done = False

    while not done:
        if np.random.random() > epsilon:
            action = np.argmax(agent.get_qs(current_state))
        else:
            action = np.random.randint(0, env.ACTION_SPACE_SIZE)

        new_state, reward, done = env.step(action)

        episode_reward += reward

        if SHOW_PREVIEW and not episode % AGGREGATE_STATS_EVERY:
            env.render()

        agent.update_replay_memory((current_state, action, reward, new_state, done))
        agent.train(done, step)

        current_state = new_state
        step += 1

    ep_rewards.append(episode_reward)
    if not episode % AGGREGATE_STATS_EVERY or episode == 1:
        average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
        min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
        max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
        agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)

        if min_reward >= MIN_REWARD:
            agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')

    if epsilon > MIN_EPSILON:
        epsilon *= EPSILON_DECAY
        epsilon = max(MIN_EPSILON, epsilon)
This post is licensed under CC BY 4.0 by the author.