The Normality Transformation via Optimized Skewness and Kurtosis (OSKT) is a normality method that simultaneously evaluates deviations in skewness and kurtosis of non-normal data.
The recent version of the package osktnorm from CRAN is
installed with the following command:
install.packages("osktnorm", repos="[https://cloud.r-project.org](https://cloud.r-project.org)", dep=TRUE)If the package osktnorm has already been installed, load
it into R working environment by using the following command:
In the following code snippet, a right-skewed distribution with 300
observations is generated using rlnorm of R and then
normalized using OSKT. For this purpose, the osktfast
function is applied by simply passing the original observation vector to
be transformed, and the results are stored in the object
res_oskt.
set.seed(12)
x_orig <- rlnorm(300, mean=0, sd=0.5) # Generate right-skewed data
# Apply OSKT normality
res_oskt <- osktfast(x_orig)
x_transformed <- res_oskt$transformed
head(x_transformed, 5)
[1] -1.802405 1.399280 -1.250515 -1.210836 -2.307431
g_star <- res_oskt$g
h_star <- res_oskt$h
A2 <- res_oskt$value
cat("Optimized skewness: ", g_star, "\n")
Optimized skewness: -0.5909243
cat("Optimized kurtosis: ", h_star, "\n")
Optimized kurtosis: 0.07987881
cat("Anderson-Darling statistic at the optimum: ", A2, "\n")
Anderson-Darling statistic at the optimum: 0.1056021The code snippet below visualizes the original and normalized observations using histograms and density plots for comparison purposes.
breaks <- pretty(range(c(x_orig, x_transformed)), n = 25)
h_orig <- hist(x_orig, breaks = breaks, plot = FALSE)
h_trans <- hist(x_transformed, breaks = breaks, plot = FALSE)
d_orig <- density(x_orig); d_trans <- density(x_transformed)
ymax <- max(c(h_orig$density, h_trans$density, d_orig$y, d_trans$y, dnorm(0)))
hist(x_orig, breaks = breaks, freq = FALSE, ylim = c(0, ymax * 1.05),
col = rgb(0.2, 0.4, 0.8, 0.4), border = "white",
main = "Before and After OSKT Transformation", xlab = "Value")
lines(d_orig, col = "blue", lwd = 2)
hist(x_transformed, breaks = breaks, freq = FALSE,
col = rgb(0.8, 0.3, 0.3, 0.4), border = "white", add = TRUE)
lines(d_trans, col = "red", lwd = 2)
curve(dnorm(x), add = TRUE, lwd = 2, lty = 2, col = "black") # Standard normal reference
legend("topleft",
legend = c("Original", "Transformed", "Original Density", "OSKT Density", "Standard Normal"),
col = c(rgb(0.2,0.4,0.8,0.6), rgb(0.8,0.3,0.3,0.6), "blue", "red", "black"),
lwd = c(10, 10, 2, 2, 2), lty = c(1, 1, 1, 1, 2), bty = "n")Back-transformation can be performed using the
backosktfast function, which uses the Brent–Dekker
algorithm for efficiency.
X_mean <- mean(x_orig)
X_sd <- sd(x_orig)
res_back <- backosktfast(
Z = x_transformed,
X_mean = X_mean, X_sd = X_sd,
g = g_star, h = h_star,
method = "brent")
x_recovered <- res_back$X_orig
head(x_recovered, 5)
[1] 0.4759235 2.2021046 0.6189750 0.6304848 0.3670768breaks <- pretty(range(c(x_orig, x_transformed, x_recovered)), n = 30)
hist(x_orig, breaks = breaks, freq = FALSE, col = rgb(0.2, 0.4, 0.9, 0.4),
border = "white", main="OSKT Transformation & Back Transformation", xlab="Value")
hist(x_transformed, breaks = breaks, freq = FALSE, col = rgb(0.8, 0.3, 0.3, 0.4),
border = "white", add=TRUE)
hist(x_recovered, breaks = breaks, freq = FALSE, col = rgb(0.2,0.8,0.2,0.4),
border = "white", add=TRUE)
legend("topleft", legend = c("Original","Transformed","Back-transformed"),
fill = c(rgb(0.2,0.4,0.8,0.4), rgb(0.8,0.3,0.3,0.4), rgb(0.2,0.8,0.2,0.4)))Diagnostic metrics compare original and recovered values to ensure mathematical inversion accuracy.
ok <- is.finite(x_orig) & is.finite(x_recovered)
xo <- x_orig[ok]
xr <- x_recovered[ok]
err <- xr - xo
MAE <- mean(abs(err))
RMSE <- sqrt(mean(err^2))
COR <- cor(xo, xr)
back_stats <- data.frame(RMSE = RMSE, MAE = MAE, Correlation= COR, R2 = COR^2)
round(t(back_stats), 8)
[,1]
RMSE 0.00089459
MAE 0.00069680
Correlation 1.00000000
R2 1.00000000Below, we generate a skewed variable using ghdist and
compare OSKT with Box-Cox (BC) and Yeo-Johnson (YJ).
set.seed(12)
x_orig <- groupcompare::ghdist(n=300, A=0, B=1, g=-0.49, h=0)
x_bc <- osktnorm::boxcox(x_orig, makepositive=TRUE)$transformed
x_yj <- osktnorm::yeojohnson(x_orig)$transformed
x_oskt <- osktfast(x_orig)$transformed
get_stats <- function(x) {
x <- x[is.finite(x)]
c(
Skew = mean((x - mean(x))^3) / sd(x)^3,
Kurt = mean((x - mean(x))^4) / sd(x)^4 - 3,
SW = shapiro.test(x)$p.value,
CVM = cvmtest(x)$p.value,
PPM = unname(pearsonp(x)$statistic)
)
}
pval_table <- rbind(ORG = get_stats(x_orig), BC = get_stats(x_bc), YJ = get_stats(x_yj), OSKT = get_stats(x_oskt))
as.data.frame(round(pval_table, 4))
Skew Kurt SW CVM PPM
ORG -1.8642 5.9759 0.0000 0.0001 9.24
BC -0.2397 -0.5635 0.0047 0.1551 0.82
YJ -0.0454 -0.3109 0.1257 0.3844 0.71
OSKT -0.2130 0.0118 0.0631 0.9100 0.56