top of page

R CODES

##INITTIAL STEPS

fm <- read.csv("lastfm.csv")
head(fm)

length(levels(fm$artist))
length(levels(fm$country))

#1004 Artists and 159 countries


##PREPARING THE DATA FOR ANALYSIS
library(arules)

?split

playlist <- split(x = fm$artist, f = fm$user)
class(playlist) #list

playlist[1:3]

#remove duplicate from each user
playlist <- lapply(playlist, unique) #lapply is called LOOP function


#View the playlist as a list of transaction
playlist <- as(playlist, "transactions")
dim(playlist)

 

 

inspect(playlist[1:3]) #inspecting the first three transactions


#itemFrequency - Frequency of items
itemFrequency(playlist[,1]) #Gives the support (item frequency) of the first song
itemFrequency(playlist[,1:5]) #Gives  the support of the first 5 songs
summary(itemFrequency(playlist)) #Summary of support

itemFrequencyPlot(playlist, topN = 10, col = "orange") #Support of top 10 artists
itemFrequencyPlot(playlist, support = 0.1, col = "blue") #Artist with support greater than 0.1


##MODEL

model <- apriori(playlist, parameter = list(support = 0.01, confidence = 0.5))
model

#Summary of the model
summary(model)

#Inspecting the rules
inspect(model)
inspect(model[1:10])
inspect(sort(model, by = "lift")[1:10])
inspect(sort(model, by = "lift"))

inspect(subset(model, subset = lift > 4))


##Prediction and Visualization

 

rules<-apriori(data=playlist, parameter=list(supp=0.005,conf = 0.3,minlen=2), 
               appearance = list(default="rhs",lhs="t.i."),
               control = list(verbose=F))

rules<-sort(rules, decreasing=TRUE,by="confidence")
inspect(rules)


#install.packages("arulesViz")
library(arulesViz)
plot(rules,method="graph",interactive=TRUE,shading=NA)

rules2<-apriori(data=playlist, parameter=list(supp=0.001,conf = 0.08), 
               appearance = list(default="lhs",rhs="t.i."),
               control = list(verbose=F))
rules2<-sort(rules2, decreasing=TRUE,by="confidence")
inspect(rules2[1:5])

bottom of page