--- title: "SQL_Lite_exerice" author: "Kathleen" date: "2/25/2019" output: pdf_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ## R Markdown This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ```{r sqlite} library(RSQLite) file_name <- "RegistrationDB.sqlite" # open a connection to SQLite and create the RegistrationDB database if (file.exists(file_name)) file.remove(file_name) db <- dbConnect(SQLite(), dbname=file_name) # In SQLite foreign key constraints are disabled by default, so they must be enabled for # each database connection separately by turning pragma foreign_keys=on dbSendQuery(conn = db, "pragma foreign_keys=on;") ``` ```{r df_create} # Create StudentDF, a data frame of students and their information studentID <- c(12654, 13887, 17625, 18574, 19876) last_name <- c("Healey", "Adams", "King", "Smith", "Healey") first_name <- c("Linda", "John", "Steven", "James", "Alison") gender <- c("Female", "Male", "Male", "Male", "Female") first_enrolled <- c(2010, 2010, 2011, 2011, 2010) city <- c("Boston", "Oakland","Oakland","Oakland", "Boston") degree <- c("BS", "BA", "BS", "BS", "BFA") StudentDF <- data.frame(studentID, last_name, first_name, gender, first_enrolled, city, degree) StudentDF ``` ```{r enrollment_df} #Create EnrollmentDF, a data frame of student enrollment studentID <- c(17625, 13887, 19876, 17625, 13887, 12654, 18574, 12654, 13887, 19876, 17625) course <- c(rep("COMP101",3), rep("COMP102", 4), rep("DS2000", 4)) year <- c(2017, 2017, 2018, 2017, 2018, 2017, 2018, rep(2019, 4)) grade <- c("A", "B", "B", "F", "A", "C", "B","A", "B", "B", "B") EnrollmentDF <- data.frame(studentID, course, year, grade) head(EnrollmentDF) # Create the Enrollment table, specifying studentID and course as foreign keys. ``` ```{r} # Create the Student table, specifying studentID as the PRIMARY KEY # Since we are specifying a primary ID, # there is no need for the AUTOINCREMENT rowid that is # automatically added by SQLite. #Add WITHOUT ROWID to the end of the CREATE TABLE statement. dbSendQuery(conn = db, "CREATE TABLE Student ( studentID INTEGER PRIMARY KEY, last_name TEXT, first_name TEXT, gender TEXT, first_enrolled INTEGER, city TEXT, degree TEXT) WITHOUT ROWID") # insert the StudentDF data frame into the Student table in the RegistrationDB database # make sure you set row.names=FALSE or else you will get an extra column dbWriteTable(conn = db, name = "Student", value = StudentDF, row.names=FALSE, append = TRUE) # check that the Student table was added correctly dbListTables(db) dbReadTable(db, "Student") ``` ```{r create_course} # Create CourseDF, a data frame of the courses and the instructor courseID <- c("COMP101", "COMP102", "COMP303", "DS2000") instructor <- c(1001, 1018, "", 1018) CourseDF <- data.frame(courseID, instructor) CourseDF1 <- tibble(courseID, instructor) # Create Course Table, where the courseID is the primary key. # Add the WITHOUT ROWID statement. dbSendQuery(conn = db, "CREATE TABLE Course ( courseID TEXT PRIMARY KEY, instructor INTEGER) WITHOUT ROWID") # insert CourseDF data into Course Table dbWriteTable(conn = db, name = "Course", value = CourseDF, row.names=FALSE, append=TRUE) # check that the Course table was added correctly dbListTables(db) dbReadTable(db, "Course") ``` ```{r} # In this table there is no column that can be used as a primary ID, so we will have to # use and autoincremented ROWID as the primary key. Since SQLite does this automatically, # we don't have to add any extra statements. Just make sure that you DO NOT include the # WITHOUT ROWID optimization in the CREATE TABLE statement. dbSendQuery(conn = db, "CREATE TABLE Enrollment ( studentID INTEGER, course TEXT, year INTEGER, grade TEXT, FOREIGN KEY(studentID) REFERENCES Student(studentID) FOREIGN KEY(course) REFERENCES Course(courseID))") # insert EnrollmentDF data into the Enrollment table. dbWriteTable(conn = db, name = "Enrollment", value = EnrollmentDF, row.names=FALSE, append=TRUE) # Check that Enrollment table was added properly dbListTables(db) dbReadTable(db, "Enrollment") ``` A Note about the ROWIDs. We enabled the WITHOUT ROWID optimization for the Student and the Course tables since both of these tables had columns that could be intuitively used as a unique identifier for information in that table. For the Student table the primary key is the studentID, and for the Course table the primary key is the courseID. Since the Enrollment table did not have such a column, we used the ROWID as the primary key. # ```{r} # Enrollment contains a ROWID that you need to explicitly ask for dbGetQuery(db, "SELECT ROWID, * FROM Enrollment WHERE grade = 'A'") ``` ```{r} # Retrieve Student information where last_name is Smith #returns an error since the Student table fors not have a ROWID # dbGetQuery(db, "SELECT ROWID, * FROM Student WHERE last_name = 'Smith'") dbGetQuery(db, "SELECT * FROM Student WHERE last_name = 'Smith'") ``` ```{r} # Find information on the course COMP101 dbGetQuery(db, "SELECT * FROM Course WHERE courseID = 'COMP101'") ``` # Exercises # For each problem write a SQL SELECT as well as a dplyr expression # 1. Determine the number of students in the data base ```{r} dbGetQuery(db, "SELECT COUNT(*) FROM Student") ``` # 2. Determine the number of female students ```{r} dbGetQuery(db, "SELECT COUNT(*) FROM Student WHERE gender = 'Female'") ``` # 3. Determine the most common grade in the enrollment table ```{r} dbGetQuery(db, "SELECT grade, COUNT(*) FROM Enrollment GROUP BY grade ORDER BY COUNT(*) DESC LIMIT 1") ``` # 4. What students has taken a course more than once? ```{r} dbGetQuery(db, "SELECT StudentID, course, COUNT(*) FROM Enrollment GROUP BY studentID, course HAVING COUNT(*) > 1") ``` #5 Create a result that for each row reports a grade for a student. The result should contain the student information, the name of the course, and the grade earned. Order the result by StudentID ```{r} dbGetQuery(db, "SELECT Student.StudentID, last_name, first_name, course, grade FROM Enrollment JOIN Student ON Enrollment.StudentID = Student.StudentID ORDER BY Student.StudentID") ``` #6 Create a result that for each row reports a grade for a student. The result should contain the student information, the name of the course, the instructor and the grade earned. Order the result by StudentID ```{r} dbGetQuery(db, "SELECT Student.StudentID, last_name, first_name, course, grade, instructor FROM Enrollment JOIN Student ON Enrollment.StudentID = Student.StudentID JOIN Course on Course.CourseID = Enrollment.Course ORDER BY Student.StudentID") ``` #7 Determine the number of letter grades given by each instructor ```{r} dbGetQuery(db, "SELECT grade, instructor, COUNT(*) FROM Enrollment JOIN Course ON Enrollment.course = Course.courseID GROUP BY grade, instructor ORDER BY instructor, grade DESC") ``` ```{r} dbDisconnect(db) ```