Skip to main content
R Programming
CHAPTER 30 Beginner

Final Projects and Real-World Applications

Updated: May 18, 2026
5 min read

# CHAPTER 30

Final Projects and Real-World Applications

1. Chapter Introduction

This final chapter applies every concept from the course into 6 production-grade R projects. Each project is a standalone portfolio piece demonstrating end-to-end data science competency.

---

Project 1: Financial Analytics Dashboard

r
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
library(quantmod); library(dplyr); library(ggplot2); library(lubridate)

set.seed(42)
# Simulate multi-stock portfolio
stocks <- c("AAPL", "MSFT", "GOOGL", "AMZN", "TSLA")
dates  <- seq(as.Date("2023-01-01"), as.Date("2024-12-31"), by="day")
trading_days <- dates[!weekdays(dates) %in% c("Saturday","Sunday")]

portfolio <- lapply(stocks, function(ticker) {
  start_price <- runif(1, 100, 500)
  n <- length(trading_days)
  returns <- rnorm(n, 0.0005, 0.02)
  prices  <- cumprod(c(start_price, 1 + returns))
  data.frame(ticker=ticker, date=trading_days, price=round(prices[1:n], 2))
})
portfolio_df <- bind_rows(portfolio)

# Calculate returns and metrics
portfolio_df <- portfolio_df %>%
  group_by(ticker) %>%
  arrange(date) %>%
  mutate(
    daily_return  = (price / lag(price) - 1),
    cum_return    = price / first(price) - 1,
    rolling_30d   = zoo::rollmean(price, 30, fill=NA, align="right"),
    rolling_vol   = zoo::rollapply(daily_return, 30, sd, fill=NA)
  ) %>%
  ungroup()

# Portfolio analytics
cat("=== PORTFOLIO ANALYTICS ===\n\n")
metrics <- portfolio_df %>%
  filter(!is.na(daily_return)) %>%
  group_by(ticker) %>%
  summarise(
    total_return    = round((last(price)/first(price)-1)*100, 1),
    ann_return      = round(((last(price)/first(price))^(252/n())-1)*100, 1),
    volatility      = round(sd(daily_return, na.rm=TRUE)*sqrt(252)*100, 1),
    sharpe_ratio    = round(mean(daily_return, na.rm=TRUE)/sd(daily_return, na.rm=TRUE)*sqrt(252), 2),
    max_drawdown    = round(min(cum_return, na.rm=TRUE)*100, 1),
    .groups="drop"
  ) %>%
  arrange(desc(sharpe_ratio))

print(metrics)

# Visualization: Cumulative returns
ggplot(portfolio_df, aes(date, (cum_return)*100, color=ticker)) +
  geom_line(size=1.2, alpha=0.8) +
  scale_color_brewer(palette="Set2") +
  labs(title="Portfolio Cumulative Returns (%)",
       subtitle="2023-2024 Performance Comparison",
       x=NULL, y="Cumulative Return (%)", color="Stock") +
  theme_minimal() +
  theme(legend.position="bottom", plot.title=element_text(face="bold"))

---

Project 2: Healthcare Data Analysis

r
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
library(dplyr); library(ggplot2); library(tidyr)

set.seed(42)
n <- 1000
patients <- data.frame(
  patient_id  = paste0("P", sprintf("%04d", 1:n)),
  age         = sample(18:90, n, replace=TRUE),
  gender      = sample(c("Male","Female"), n, replace=TRUE),
  bmi         = round(rnorm(n, 27, 5), 1),
  blood_pressure = round(rnorm(n, 120, 20)),
  cholesterol = round(rnorm(n, 200, 35)),
  glucose     = round(rnorm(n, 100, 25)),
  smoking     = sample(c("Never","Former","Current"), n, replace=TRUE, prob=c(0.5,0.25,0.25)),
  diabetes    = sample(c(0,1), n, replace=TRUE, prob=c(0.85,0.15))
) %>%
  mutate(
    bmi_category = cut(bmi, breaks=c(0,18.5,25,30,Inf),
                        labels=c("Underweight","Normal","Overweight","Obese")),
    bp_category  = ifelse(blood_pressure < 120, "Normal",
                    ifelse(blood_pressure < 130, "Elevated",
                    ifelse(blood_pressure < 140, "Stage 1", "Stage 2")))
  )

# Risk factor analysis
cat("=== HEALTHCARE ANALYTICS REPORT ===\n\n")
cat("Diabetes Prevalence by BMI Category:\n")
patients %>%
  group_by(bmi_category) %>%
  summarise(prevalence=round(mean(diabetes)*100,1), n=n(), .groups="drop") %>%
  print()

cat("\nDiabetes Prevalence by Smoking Status:\n")
patients %>%
  group_by(smoking) %>%
  summarise(prevalence=round(mean(diabetes)*100,1), n=n(), .groups="drop") %>%
  print()

# Logistic regression: diabetes risk model
logit_model <- glm(diabetes ~ age + bmi + blood_pressure + cholesterol + glucose + smoking,
                    data=patients, family=binomial)
cat("\nLogistic Regression Coefficients:\n")
print(round(exp(coef(logit_model)), 3))  # Odds ratios
cat("(Values > 1 increase diabetes risk)\n")

# Predict risk
patients$risk_score <- predict(logit_model, type="response")
high_risk <- patients %>% filter(risk_score > 0.3) %>% arrange(desc(risk_score))
cat(sprintf("\nHigh-risk patients identified: %d (%.1f%%)\n",
             nrow(high_risk), nrow(high_risk)/nrow(patients)*100))

---

Project 3: HR Analytics Platform

r
1234567891011121314151617181920212223242526272829303132333435363738394041424344
library(dplyr); library(ggplot2); library(caret)

set.seed(42)
n <- 1000
hr_data <- data.frame(
  emp_id      = 1:n,
  age         = sample(22:60, n, replace=TRUE),
  dept        = sample(c("Engineering","Sales","HR","Finance","Marketing"), n, TRUE),
  salary      = round(runif(n, 35000, 120000), -3),
  satisfaction= round(runif(n, 1, 10), 1),
  tenure      = sample(1:20, n, replace=TRUE),
  performance = round(rnorm(n, 3, 0.8)),
  overtime    = sample(c("Yes","No"), n, TRUE, prob=c(0.3,0.7)),
  attrition   = sample(c("Yes","No"), n, TRUE, prob=c(0.16,0.84))
)
hr_data$performance <- factor(pmax(1, pmin(5, hr_data$performance)))
hr_data$attrition   <- factor(hr_data$attrition)

# Attrition analysis
cat("=== HR ANALYTICS PLATFORM ===\n\n")
cat("Overall Attrition Rate:", round(mean(hr_data$attrition=="Yes")*100,1), "%\n\n")

cat("Attrition by Department:\n")
hr_data %>% group_by(dept) %>%
  summarise(rate=round(mean(attrition=="Yes")*100,1), n=n(), .groups="drop") %>%
  arrange(desc(rate)) %>% print()

cat("\nAttrition by Satisfaction Quartile:\n")
hr_data %>%
  mutate(sat_q = cut(satisfaction, breaks=quantile(satisfaction, c(0,0.25,0.5,0.75,1)),
                      labels=c("Low","Med-Low","Med-High","High"), include.lowest=TRUE)) %>%
  group_by(sat_q) %>%
  summarise(rate=round(mean(attrition=="Yes")*100,1), n=n(), .groups="drop") %>% print()

# Predictive model: attrition risk
train_idx <- createDataPartition(hr_data$attrition, p=0.8, list=FALSE)
train <- hr_data[train_idx,]; test <- hr_data[-train_idx,]
ctrl  <- trainControl(method="cv", number=5, classProbs=TRUE)
model <- train(attrition ~ age + dept + salary + satisfaction + tenure + performance + overtime,
               data=train, method="rf", trControl=ctrl)
pred <- predict(model, test)
cm   <- confusionMatrix(pred, test$attrition, positive="Yes")
cat(sprintf("\nAttrition Model: Acc=%.3f, F1=%.3f\n",
             cm$overall["Accuracy"], cm$byClass["F1"]))

---

Course Complete! 🎉

text
1234567891011121314151617181920212223242526272829303132333435363738
R PROGRAMMING COURSE — COMPLETE SUMMARY

Foundations (Ch 1-10):
✅ Introduction, installation, RStudio
✅ Syntax, variables, data types, operators
✅ Control flow, functions, vectors
✅ Matrices, lists, data frames

Data Handling (Ch 11-15):
✅ Strings (stringr), file handling
✅ Data import (readr, readxl, JSON, SQL)
✅ Data cleaning (tidyr, dplyr)
✅ Data manipulation (dplyr pipeline)

Visualization & Statistics (Ch 16-20):
✅ ggplot2 Grammar of Graphics
✅ Descriptive statistics, distributions
✅ Hypothesis testing (t-test, ANOVA, chi-squared)
✅ Correlation and regression

Advanced Analytics (Ch 21-27):
✅ Time series and forecasting
✅ Exploratory Data Analysis (EDA)
✅ Machine learning with caret
✅ Classification and clustering
✅ Real-world datasets, Shiny dashboards
✅ Functional programming, performance

Your next steps:
→ R Markdown for reproducible reports
→ Tidymodels for modern ML workflows
→ Bayesian statistics with Stan/brms
→ Bioconductor for genomics
→ Plotly/Crosstalk for interactive viz
→ Deploy Shiny apps to shinyapps.io

Congratulations on completing
R Programming for Beginners to Advanced!

Finish this Chapter

Save your progress on your learning path and prepare for coding interview challenges.

Discussion

Join the discussion

Log in or create a free account to participate.

Sort: ·