# Reinforcement Learning in Python and R - Machine Learning and Data Sciense cheat sheets.

Python programming language and its libraries combined together and R language in addition form the powerful tools for solving Reinforcement Learning tasks.

Reinforcement learning - RL - is an branch of machine learning ML which busy with how intelligent agents should take actions in an environment in order to maximize the notion of cumulative reward. Reinforcement learning is one of three basic machine learning concepts next to supervised learning and unsupervised learning.

Reinforcement learning is distinguished from supervised learning in not needing labelled input/output pairs be presented, and in not needing sub-optimal actions to be explicitly corrected. Instead the focus is on finding a balance between exploration (of uncharted territory) and exploitation (of current knowledge).

## Upper Confidence Bound (UCB) model

### Upper Confidence Bound in Python

#Importing the libraries

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

#Importing the dataset

dataset = pd.read_csv('my_dataset.csv', header = None)

#Implementing UCB

import math

N = 10000

d = 10

ads_selected = []

numbers_of_selections = [0] * d

sums_of_rewards = [0] * d

total_reward = 0

for n in range(0, N):

ad = 0

max_upper_bound = 0

for i in range(0, d):

if (numbers_of_selections[i] > 0):

average_reward = sums_of_rewards[i] / numbers_of_selections[i]

delta_i = math.sqrt(3/2 * math.log(n + 1) / numbers_of_selections[i])

upper_bound = average_reward + delta_i

else:

upper_bound = 1e400

if (upper_bound > max_upper_bound):

max_upper_bound = upper_bound

ad = i

ads_selected.append(ad)

numbers_of_selections[ad] = numbers_of_selections[ad] + 1

reward = dataset.values[n, ad]

sums_of_rewards[ad] = sums_of_rewards[ad] + reward

total_reward = total_reward + reward

#Visualising the results - Histogram

plt.hist(ads_selected)

plt.title('Histogram of ads selections')

plt.xlabel('Ads')

plt.ylabel('Number of times each ad was selected')

plt.show()

### Upper Confidence Bound in R

#Importing the dataset

dataset = read.csv('my_dataset.csv')

#Implementing Upper Confidence Bound

N = 10000

d = 10

ads_selected = integer(0)

numbers_of_selections = integer(d)

sums_of_rewards = integer(d)

total_reward = 0

for (n in 1:N) {

ad = 0

max_upper_bound = 0

for (i in 1:d) {

if (numbers_of_selections[i] > 0) {

average_reward = sums_of_rewards[i] / numbers_of_selections[i]

delta_i = sqrt(3/2 * log(n) / numbers_of_selections[i])

upper_bound = average_reward + delta_i

} else {

upper_bound = 1e400

}

if (upper_bound > max_upper_bound) {

max_upper_bound = upper_bound

ad = i

}

}

ads_selected = append(ads_selected, ad)

numbers_of_selections[ad] = numbers_of_selections[ad] + 1

reward = dataset[n, ad]

sums_of_rewards[ad] = sums_of_rewards[ad] + reward

total_reward = total_reward + reward }

# Visualising the results

hist(ads_selected, col = 'blue', main = 'Histogram of ads selections', xlab = 'Ads', ylab = 'Number of times each ad was selected')

## Thompson Sampling model

### Thompson Sampling in Python

#Importing the libraries

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

#Importing the dataset

dataset = pd.read_csv('my_dataset.csv', header = None)

#Implementing Thompson Sampling

import random

N = 10000

d = 10

ads_selected = []

numbers_of_rewards_1 = [0] * d

numbers_of_rewards_0 = [0] * d

total_reward = 0

for n in range(0, N):

ad = 0

max_random = 0

for i in range(0, d):

random_beta = random.betavariate(numbers_of_rewards_1[i] + 1, numbers_of_rewards_0[i] + 1)

if (random_beta > max_random):

max_random = random_beta

ad = i

ads_selected.append(ad)

reward = dataset.values[n, ad]

if reward == 1:

numbers_of_rewards_1[ad] = numbers_of_rewards_1[ad] + 1

else:

numbers_of_rewards_0[ad] = numbers_of_rewards_0[ad] + 1

total_reward = total_reward + reward

#Visualising the results - Histogram

plt.hist(ads_selected)

plt.title('Histogram of ads selections')

plt.xlabel('Ads')

plt.ylabel('Number of times each ad was selected')

plt.show()

### Thompson Sampling in R

#Importing the dataset

dataset = read.csv('my_dataset.csv')

#Implementing Thompson Sampling

N = 10000

d = 10

ads_selected = integer(0)

numbers_of_rewards_1 = integer(d)

numbers_of_rewards_0 = integer(d)

total_reward = 0

for (n in 1:N) {

ad = 0

max_random = 0

for (i in 1:d) {

random_beta = rbeta(n = 1,

shape1 = numbers_of_rewards_1[i] + 1,

shape2 = numbers_of_rewards_0[i] + 1)

if (random_beta > max_random) {

max_random = random_beta

ad = i

}

}

ads_selected = append(ads_selected, ad)

reward = dataset[n, ad]

if (reward == 1) {

numbers_of_rewards_1[ad] = numbers_of_rewards_1[ad] + 1

} else {

numbers_of_rewards_0[ad] = numbers_of_rewards_0[ad] + 1

}

total_reward = total_reward + reward

}

# Visualising the results

hist(ads_selected, col = 'blue', main = 'Histogram of ads selections', xlab = 'Ads', ylab = 'Number of times each ad was selected')