This is my Capstone Project for the Data Science Specialization by Johns Hopkins University on Coursera.
The goal of the Capstone Project was to implement a useful model for predict a word given one or more words and to develop a web app to use it.
In the last post you can find how I created the tables used for this app.
Below there is the code for the app, in addiction to R there is a bit of HTML and CSS. You can see and use the app here.
------------------------------------------------------------------------
library(shiny)
library(dplyr)
load("WordBigram.RData")
load("WordTrigram.RData")
load("WordQuadrigram.RData")
Found <- 'N'
virgola <- ',\n'
# ui
ui <- fluidPage(
tags$head(tags$style(
HTML('
#word {
color: gold;
font-size: 300%;
}
#others {
color: gold;
font-size: 200%;
}
tabPanel{
font-family: "Calibri";
color: gold;
}
body, label, input, button, select {
font-family: "Calibri";
background: #000000 url("http://massyfigini.github.io/assets/css/images/DocStrange.jpg") bottom left;
background-position: center center;
background-repeat: no-repeat;
background-attachment: fixed;
background-size: cover;
}'))),
titlePanel(HTML("<font size=10 color=white><b><center>The Next Word App</center></b></font><br/>"),windowTitle="The Next Word App"),
sidebarLayout(
sidebarPanel(tags$style(".well {background-color:#d3d3d3;}"),
tabsetPanel(
# First tab (input)
tabPanel(HTML("<font color=black>Input</font>"),
textInput("Word", NULL,
placeholder = "Write the words here...")
,actionButton("go","Go!", icon = icon("plane"))
,HTML("<br/><br/><br/><a href=http://www.massimilianofigini.com>©massyfigini</a>")),
# Second tab (Instruction)
tabPanel(HTML("<font color=black>About the app</font>"),
HTML("<br/><b>Instruction</b><br/>
This app predict the next word given one or more words.<br/>
In the input tab, you have to write one or more words and press the 'Go!' button, and the app will predict the next word.<br/>
<br/><b>Data</b><br/>
The English 'Corpora' data are the starting point for the algorithm, you can find more information
<a href=https://web-beta.archive.org/web/20160930083655/http://www.corpora.heliohost.org/aboutcorpus.html>here</a>.
<br/><br/><b>Algorithm</b><br/>
The algorithm is created starting from the 'Corpora' data. The data are first divided in sentences, than I have
made the bigram, trigram and 4-gram data.
Every words you insert, the algorithm choose the most probabilities next word. You can find more information
<a href=http://rpubs.com/massyfigini/NextWordApp>here</a>.
<br/><br/><a href=http://www.massimilianofigini.com>©massyfigini</a>")))
,width=4
),
mainPanel(
HTML("<font size=5 color=white>Top probability next word</font><br/>"),
htmlOutput("word"),
HTML("<br/><br/>"),
HTML("<font size=5 color=white>Other possibly words</font><br/>"),
htmlOutput("others")
)
)
)
# server
server <- function(input, output) {
observeEvent(input$go, {
a <- tolower(input$Word)
a <- unlist(strsplit(a, " ", fixed=TRUE))
# Algorithm
if(length(a) > 2) {
# more then 2 words: first in quadrigram, then trigram, then bigram
c <- paste(a[length(a)-2], a[length(a)-1], a[length(a)])
Next <- WordQuadrigram %>% filter(Start == c) %>% select(First,Second,Third)
# search in trigram
if(nrow(Next) == 0) {
b <- paste(a[length(a)-1], a[length(a)])
Next <- WordTrigram %>% filter(Start == b) %>% select(First,Second,Third)
if(nrow(Next) == 0) {
# word not found, search in bigram
z <- a[length(a)]
Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
if(nrow(Next) == 0) {
# word not found in bigram
Found <- 'N'
} else {
# found in bigram
Found <- 'B'
B1 <- Next[1]
B2 <- Next[2]
B3 <- Next[3]
}
} else {
# found in trigram
Found <- 'T'
T1 <- Next[1]
T2 <- Next[2]
T3 <- Next[3]
# search also in bigram
z <- a[length(a)]
Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
B1 <- Next[1]
B2 <- Next[2]
B3 <- Next[3]
}
# found in quadrigram
} else {
Found <- 'Q'
Q1 <- Next[1]
Q2 <- Next[2]
Q3 <- Next[3]
# search also in bigram
z <- a[length(a)]
Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
B1 <- Next[1]
B2 <- Next[2]
B3 <- Next[3]
}
} else if(length(a) == 2) {
# if are two, search in trigram first
b <- paste(a[1], a[2])
Next <- WordTrigram %>% filter(Start == b) %>% select(First,Second,Third)
if(nrow(Next) == 0) {
# word not found, search in bigram
z <- a[length(a)]
Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
if(nrow(Next) == 0) {
# word not found
Found <- 'N'
} else {
# found in bigram
Found <- 'B'
B1 <- Next[1]
B2 <- Next[2]
B3 <- Next[3]
}
} else {
# found in trigram
Found <- 'T'
T1 <- Next[1]
T2 <- Next[2]
T3 <- Next[3]
# found also in bigram
z <- a[length(a)]
Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
B1 <- Next[1]
B2 <- Next[2]
B3 <- Next[3]
}
} else {
# if only one go here
z <- a[1]
Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
if(nrow(Next) == 0) {
# word not found
Found <- 'N'
} else {
# found
Found <- 'B'
B1 <- Next[1]
B2 <- Next[2]
B3 <- Next[3]
}
}
output$word <- renderPrint({
if(Found == 'N') {
HTML("<font size=5 color=red>Next word not found!</font>")
} else if(Found == 'B' ) {
print(unname(B1), row.names=FALSE)
} else if (Found == 'T'){
print(unname(T1), row.names=FALSE)
} else if (Found == 'Q'){
print(unname(Q1), row.names=FALSE)
}
})
output$others <- renderPrint({
if(Found == 'N') {
HTML("<font size=5 color=red>Words not found!</font>")
} else if(Found == 'B') {
print(unname(B2), row.names=FALSE)
cat(",\n")
print(unname(B3), row.names=FALSE)
} else if(Found == 'T'){
if(as.character(T1$First[1]) != as.character(B1$First[1])) {
print(unname(B1), row.names=FALSE)
cat(",\n")
}
if(as.character(T1$First[1]) != as.character(B2$Second[1])) {
print(unname(B2), row.names=FALSE)
cat(",\n")
}
if(as.character(T1$First[1]) != as.character(B3$Third[1])) {
print(unname(B3), row.names=FALSE)
}
} else if(Found == 'Q'){
if(as.character(Q1$First[1]) != as.character(B1$First[1])) {
print(unname(B1), row.names=FALSE)
cat(",\n")
}
if(as.character(Q1$First[1]) != as.character(B2$Second[1])) {
print(unname(B2), row.names=FALSE)
cat(",\n")
}
if(as.character(Q1$First[1]) != as.character(B3$Third[1])) {
print(unname(B3), row.names=FALSE)
}
}
})
})
}
# Run the app
shinyApp(ui = ui, server = server)
# call this file App.R, and in the console type: runApp("directory of the App.R file")
Categories
Bash
(3)
BOT
(2)
C#
(1)
Cluster Analysis
(1)
Data Cleaning
(6)
Data Ingestion
(2)
Data Science Specialization
(10)
Data Visualization
(15)
ggplot2
(1)
Hadoop
(1)
Hashnode
(3)
Machine Learning
(5)
MapReduce
(1)
Maps
(1)
Markdown
(7)
Market Basket Analysis
(1)
MATLAB
(1)
Matplotlib
(3)
Numpy
(2)
Octave
(1)
Pandas
(3)
Python
(17)
R
(22)
Regression
(7)
scikit-learn
(1)
Seaborn
(1)
Shell
(3)
Shiny App
(1)
SSIS
(3)
Statistical Inference
(2)
T-SQL
(8)
Unix
(3)
No comments:
Post a Comment