
R CODES
##INITTIAL STEPS
fm <- read.csv("lastfm.csv")
head(fm)
length(levels(fm$artist))
length(levels(fm$country))
#1004 Artists and 159 countries
##PREPARING THE DATA FOR ANALYSIS
library(arules)
?split
playlist <- split(x = fm$artist, f = fm$user)
class(playlist) #list
playlist[1:3]
#remove duplicate from each user
playlist <- lapply(playlist, unique) #lapply is called LOOP function
#View the playlist as a list of transaction
playlist <- as(playlist, "transactions")
dim(playlist)
inspect(playlist[1:3]) #inspecting the first three transactions
#itemFrequency - Frequency of items
itemFrequency(playlist[,1]) #Gives the support (item frequency) of the first song
itemFrequency(playlist[,1:5]) #Gives the support of the first 5 songs
summary(itemFrequency(playlist)) #Summary of support
itemFrequencyPlot(playlist, topN = 10, col = "orange") #Support of top 10 artists
itemFrequencyPlot(playlist, support = 0.1, col = "blue") #Artist with support greater than 0.1
##MODEL
model <- apriori(playlist, parameter = list(support = 0.01, confidence = 0.5))
model
#Summary of the model
summary(model)
#Inspecting the rules
inspect(model)
inspect(model[1:10])
inspect(sort(model, by = "lift")[1:10])
inspect(sort(model, by = "lift"))
inspect(subset(model, subset = lift > 4))
##Prediction and Visualization
rules<-apriori(data=playlist, parameter=list(supp=0.005,conf = 0.3,minlen=2),
appearance = list(default="rhs",lhs="t.i."),
control = list(verbose=F))
rules<-sort(rules, decreasing=TRUE,by="confidence")
inspect(rules)
#install.packages("arulesViz")
library(arulesViz)
plot(rules,method="graph",interactive=TRUE,shading=NA)
rules2<-apriori(data=playlist, parameter=list(supp=0.001,conf = 0.08),
appearance = list(default="lhs",rhs="t.i."),
control = list(verbose=F))
rules2<-sort(rules2, decreasing=TRUE,by="confidence")
inspect(rules2[1:5])