We have provided a detailed example of Thompson Sampling using R. Below, we show how we can run the Multi-armed bandit algorithm in Python.
import pandas as pd import numpy as np from scipy.stats import beta import math np.random.seed(1234) def bandits(data, beta_values=1000): indexes=data.index data['random_state']=range(0,len(data)) data['monte_carlo']=data.apply(lambda x:beta(x['clicks']+1,x['impressions']-x['clicks']+1).rvs(beta_values),axis=1) weights=pd.DataFrame(list(data['monte_carlo']),index=indexes).idxmax().value_counts()/beta_values data['weights']=weights return(data[['weights']].fillna(0)) ### example z={'clicks': [15,17,19], 'impressions': [1000,1000,1000]} z=pd.DataFrame(z) # and the weights print(bandits(z))
Output:
weights
0 0.159
1 0.288
2 0.553