%jars /home/vishnuaa77/vscode/vishnu/lib/jfreechart-1.5.4.jar
%jars /home/vishnuaa77/vscode/vishnu/lib/jcommon-1.0.24.jar
%jars /home/vishnuaa77/vscode/vishnu/lib/commons-math3-3.6.1.jar

import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartUtils;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.plot.PlotOrientation;
import org.jfree.data.xy.XYSeries;
import org.jfree.data.xy.XYSeriesCollection;

import java.io.File;

public class DataVisualization {

    public static void generateAndSaveChart(double[] xData, double[] yData, String title, String xAxisLabel, String yAxisLabel, String filePath) {
        XYSeries series = new XYSeries(title);
        for (int i = 0; i < xData.length; i++) {
            series.add(xData[i], yData[i]);
        }
        XYSeriesCollection dataset = new XYSeriesCollection(series);
        JFreeChart chart = ChartFactory.createScatterPlot(
                title,
                xAxisLabel,
                yAxisLabel,
                dataset,
                PlotOrientation.VERTICAL,
                true,
                true,
                false
        );

        try {
            ChartUtils.saveChartAsPNG(new File(filePath), chart, 800, 600);
            System.out.println("Chart has been saved as " + filePath);
        } catch (Exception e) {
            System.err.println("Problem occurred creating chart.");
        }
    }
}

The DataVisualization class is used to create and save visual representations of data, specifically scatter plots, using the JFreeChart library. The class method generateAndSaveChart takes arrays of x and y data points, a title, labels for the x and y axes, and a file path as parameters. It creates a data series from the x and y arrays, adds it to a dataset, and then generates a scatter plot with the specified title and axis labels. This plot is oriented vertically and includes options to show legends and tooltips, but without URLs. Finally, the chart is saved as a PNG file to the given file path, and the method prints out a confirmation message. If an error occurs during this process, it prints out an error message instead.

public class MockDataGenerator {

    private static final Random random = new Random();

    public static double[][] generateXData(int numStudents) {
        double[][] xData = new double[numStudents][4];
    
        for (int i = 0; i < numStudents; i++) {
            int commits = 30 + random.nextInt(120);
            int pullRequests = 10 + random.nextInt(50);  // Generate independently
            int issues = 5 + random.nextInt(40);         // Generate independently
            int reposContributed = 2 + random.nextInt(20); // Generate independently           

    
            xData[i][0] = commits;
            xData[i][1] = pullRequests;
            xData[i][2] = issues;
            xData[i][3] = reposContributed;
        }
    
        return xData;
    }

    public static double[] generateYData(double[][] xData) {
        double[] yData = new double[xData.length];

        for (int i = 0; i < xData.length; i++) {
            yData[i] = calculateGrade((int)xData[i][0], (int)xData[i][1], (int)xData[i][2], (int)xData[i][3]);
        }

        return yData;
    }

    private static double calculateGrade(int commits, int pullRequests, int issues, int reposContributed) {
        double commitGrade;
        if (commits <= 30) {
            commitGrade = 60;
        } else {
            commitGrade = Math.min(100, 60 + (10 * (1 - 1 / Math.log(commits - 29))));
        }
    
        double pullRequestGrade;
        if (pullRequests <= 10) {
            pullRequestGrade = 60;
        } else {
            pullRequestGrade = Math.min(100, 60 + (10 * (1 - 1 / Math.log(pullRequests - 9))));
        }
    
        double issueGrade;
        if (issues <= 5) {
            issueGrade = 60;
        } else {
            issueGrade = Math.min(100, 60 + (10 * (1 - 1 / Math.log(issues - 4))));
        }
    
        double repoGrade;
        if (reposContributed <= 2) {
            repoGrade = 60;
        } else {
            repoGrade = Math.min(100, 60 + (10 * (1 - 1 / Math.log(reposContributed - 1))));
        }
    
        return 0.4 * commitGrade + 0.2 * pullRequestGrade + 0.2 * issueGrade + 0.2 * repoGrade;
    }
}

The MockDataGenerator class is used for creating simulated data sets that represent student engagement in software development activities and their corresponding grades. It generates random values for the number of commits, pull requests, issues, and repositories contributed to for each student. These values are then used to calculate a grade based on a logarithmic scale, with different weights assigned to each activity type. This mock data could be used for training a machine learning model to predict student grades from their coding activity metrics.

import java.awt.Color;
import java.awt.image.BufferedImage;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import javax.imageio.ImageIO;

import org.apache.commons.math3.linear.Array2DRowRealMatrix;
import org.apache.commons.math3.linear.ArrayRealVector;
import org.apache.commons.math3.linear.LUDecomposition;
import org.apache.commons.math3.linear.MatrixUtils;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.linear.RealVector;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.ChartUtils;
import org.jfree.chart.plot.PlotOrientation;
import org.jfree.chart.renderer.xy.XYLineAndShapeRenderer;
import org.jfree.data.xy.XYSeries;
import org.jfree.data.xy.XYSeriesCollection;

public class MultiVarAnalyticsGradeRegression {

    public void performCrossValidation(int numStudents) {
        double[][] xData = MockDataGenerator.generateXData(numStudents);
        double[][] normalizedXData = normalize(xData);
        double[] yData = MockDataGenerator.generateYData(xData);

        // Split the data into training and testing sets (80-20 split)
        int trainSize = (int) (0.8 * numStudents);
        double[][] trainX = Arrays.copyOfRange(normalizedXData, 0, trainSize);
        double[][] testX = Arrays.copyOfRange(normalizedXData, trainSize, numStudents);
        double[] trainY = Arrays.copyOfRange(yData, 0, trainSize);
        double[] testY = Arrays.copyOfRange(yData, trainSize, numStudents);

        // Train the regression model on the training set
        double[] coefficients = calculateCoefficients(trainX, trainY);

        // Predict the grades for the testing set
        double[] predictedY = new double[testY.length];
        for (int i = 0; i < testX.length; i++) {
            predictedY[i] = coefficients[0];  // bias term
            for (int j = 0; j < testX[i].length; j++) {
                predictedY[i] += coefficients[j + 1] * testX[i][j];
            }
        }

        // Calculate the Mean Squared Error (MSE) for the testing set
        double mse = 0;
        for (int i = 0; i < testY.length; i++) {
            mse += Math.pow(testY[i] - predictedY[i], 2);
        }
        mse /= testY.length;

        System.out.println("Mean Squared Error on Testing Set: " + mse);
    }

    // Modify the main method to call performCrossValidation instead of performRegression
    public static void main(String[] args) {
        MultiVarAnalyticsGradeRegression regression = new MultiVarAnalyticsGradeRegression();
        int numStudents = 1000;
    
        // Perform regression and get the result
        RegressionResult result = regression.performRegression(numStudents);
    
        // Now you can extract the data from the result
        double[] commitsData = getColumn(result.getXData(), 0); // Assuming the first column is commits
        double[] gradesData = result.getYData(); // Actual grade data
        double[] coefficients = result.getCoefficients(); // Coefficients from regression
    
        // Display the "Grades vs. Commits" chart
        displayChart(commitsData, gradesData, coefficients, "Commits");
    }
    

    public static double[][] normalize(double[][] xData) {
        int n = xData.length;
        int m = xData[0].length;
        double[][] normalizedData = new double[n][m];

        for (int j = 0; j < m; j++) {
            double[] column = getColumn(xData, j);
            double mean = Arrays.stream(column).average().orElse(0);
            double stddev = Math.sqrt(Arrays.stream(column).map(val -> Math.pow(val - mean, 2)).average().orElse(0));

            for (int i = 0; i < n; i++) {
                normalizedData[i][j] = (xData[i][j] - mean) / stddev;
            }
        }

        return normalizedData;
    }

    public RegressionResult performRegression(int numStudents) {
        double[][] xData = MockDataGenerator.generateXData(numStudents);
        double[][] normalizedXData = normalize(xData);  // Normalize the xData
        double[] yData = MockDataGenerator.generateYData(xData);

        double[] coefficients = calculateCoefficients(normalizedXData, yData);  // Use normalized xData for regression

        RegressionResult result = new RegressionResult();
        result.setCoefficients(coefficients);
        result.setXData(xData);  // Store normalized xData in the result
        result.setYData(yData);

        return result;
    }

    public static class RegressionResult {
        private double[][] xData;
        private double[] yData;
        private double[] coefficients;

        // Getters and setters...
        public double[][] getXData() {
            return xData;
        }

        public void setXData(double[][] xData) {
            this.xData = xData;
        }

        public double[] getYData() {
            return yData;
        }

        public void setYData(double[] yData) {
            this.yData = yData;
        }

        public double[] getCoefficients() {
            return coefficients;
        }

        public void setCoefficients(double[] coefficients) {
            this.coefficients = coefficients;
        }
    }

    public static double[] getColumn(double[][] matrix, int columnIndex) {
        double[] column = new double[matrix.length];
        for (int i = 0; i < matrix.length; i++) {
            column[i] = matrix[i][columnIndex];
        }
        return column;
    }

    public static double[] calculateCoefficients(double[][] xData, double[] yData) {
        int n = xData.length;
        int m = xData[0].length;
    
        // Calculate 10th and 75th percentiles for each predictor
        double[] q10s = new double[m];
        double[] q85s = new double[m];
        for (int j = 0; j < m; j++) {
            double[] column = getColumn(xData, j);
            q10s[j] = calculateQuantile(column, 0.15);
            q85s[j] = calculateQuantile(column, 0.85);
        }
    
        // Filter data to only include rows between 10th and 85th percentiles for all predictors
        List<double[]> filteredXList = new ArrayList<>();
        List<Double> filteredYList = new ArrayList<>();
        for (int i = 0; i < n; i++) {
            boolean include = true;
            for (int j = 0; j < m; j++) {
                if (xData[i][j] < q10s[j] || xData[i][j] > q85s[j]) {
                    include = false;
                    break;
                }
            }
            if (include) {
                filteredXList.add(xData[i]);
                filteredYList.add(yData[i]);
            }
        }
        double[][] filteredX = filteredXList.toArray(new double[0][0]);
        double[] filteredY = filteredYList.stream().mapToDouble(Double::doubleValue).toArray();
    
        // Now perform regression on filtered data
        n = filteredX.length;
        RealMatrix X = new Array2DRowRealMatrix(n, m + 1); // +1 for bias term
        RealVector Y = new ArrayRealVector(filteredY, false);
    
        for (int i = 0; i < n; i++) {
            X.setEntry(i, 0, 1);  // Bias term
            for (int j = 0; j < m; j++) {
                X.setEntry(i, j + 1, filteredX[i][j]);
            }
        }
    
        RealMatrix Xt = X.transpose();
        RealMatrix XtX = Xt.multiply(X);
        double lambda = 0.01;
        RealMatrix identity = MatrixUtils.createRealIdentityMatrix(m + 1);
        XtX = XtX.add(identity.scalarMultiply(lambda));
        RealMatrix XtXInverse = new LUDecomposition(XtX).getSolver().getInverse();
        RealVector XtY = Xt.operate(Y);
        RealVector B = XtXInverse.operate(XtY);
    
        return B.toArray();
    }        
    

    public static void displayChart(double[] xData, double[] yData, double[] coefficients, String metricName) {
        XYSeries series = new XYSeries("Students");
        XYSeries regressionLine = new XYSeries("Regression Line");

        for (int i = 0; i < xData.length; i++) {
            series.add(xData[i], yData[i]);
        }

        // Plotting the regression line across a range of x-values
        double minX = Arrays.stream(xData).min().orElse(0);
        double maxX = Arrays.stream(xData).max().orElse(1);
        for (double x = minX; x <= maxX; x += (maxX - minX) / 100.0) {  // 100 points for the regression line
            double predictedY = coefficients[0] + coefficients[1] * x;
            regressionLine.add(x, predictedY);
        }

        XYSeriesCollection dataset = new XYSeriesCollection();
        dataset.addSeries(series);
        dataset.addSeries(regressionLine);

        JFreeChart chart = ChartFactory.createScatterPlot(
                "Grades vs " + metricName,
                metricName,
                "Grades",
                dataset,
                PlotOrientation.VERTICAL,
                true,
                true,
                false
        );

        XYLineAndShapeRenderer renderer = new XYLineAndShapeRenderer();
        renderer.setSeriesLinesVisible(0, false);
        renderer.setSeriesShapesVisible(0, true);
        renderer.setSeriesLinesVisible(1, true);
        renderer.setSeriesShapesVisible(1, false);

        chart.getXYPlot().setRenderer(renderer);

        try {
            // Create a buffered image to hold the chart
            BufferedImage chartImage = chart.createBufferedImage(800, 600);
    
            // Save the chart as a PNG
            File outputFile = new File("src/main/resources/static/images/" + metricName + ".png");
            ImageIO.write(chartImage, "png", outputFile);
    
            System.out.println("Chart saved as " + metricName + ".png");
        } catch (Exception e) {
            System.err.println("Problem occurred creating chart.");
            e.printStackTrace();
        }
    }

    public static double calculateQuantile(double[] data, double quantile) {
        double[] sortedData = data.clone();
        Arrays.sort(sortedData);
        int n = sortedData.length;
    
        if (quantile < 0 || quantile > 1) {
            throw new IllegalArgumentException("Quantile should be between 0 and 1");
        }
    
        double pos = (n - 1) * quantile;
        int lower = (int) Math.floor(pos);
        int upper = (int) Math.ceil(pos);
        if (lower == upper) {
            return sortedData[lower];
        }
        double weight = pos - lower;
        return (1 - weight) * sortedData[lower] + weight * sortedData[upper];
    }    
}

MultiVarAnalyticsGradeRegression.main(null);
java.io.FileNotFoundException: src/main/resources/static/images/Commits.png (No such file or directory)
	at java.base/java.io.RandomAccessFile.open0(Native Method)
	at java.base/java.io.RandomAccessFile.open(RandomAccessFile.java:344)
	at java.base/java.io.RandomAccessFile.<init>(RandomAccessFile.java:259)
	at java.base/java.io.RandomAccessFile.<init>(RandomAccessFile.java:213)
	at java.desktop/javax.imageio.stream.FileImageOutputStream.<init>(FileImageOutputStream.java:69)
	at java.desktop/com.sun.imageio.spi.FileImageOutputStreamSpi.createOutputStreamInstance(FileImageOutputStreamSpi.java:55)
	at java.desktop/javax.imageio.ImageIO.createImageOutputStream(ImageIO.java:421)
	at java.desktop/javax.imageio.ImageIO.write(ImageIO.java:1551)
	at REPL.$JShell$49F$MultiVarAnalyticsGradeRegression.displayChart($JShell$49F.java:267)
	at REPL.$JShell$49F$MultiVarAnalyticsGradeRegression.main($JShell$49F.java:89)
	at REPL.$JShell$56.do_it$($JShell$56.java:39)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:568)
	at io.github.spencerpark.ijava.execution.IJavaExecutionControl.lambda$execute$1(IJavaExecutionControl.java:95)
	at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	at java.base/java.lang.Thread.run(Thread.java:833)
Problem occurred creating chart.
javax.imageio.IIOException: Can't create an ImageOutputStream!
	at java.desktop/javax.imageio.ImageIO.write(ImageIO.java:1553)
	at REPL.$JShell$49F$MultiVarAnalyticsGradeRegression.displayChart($JShell$49F.java:267)
	at REPL.$JShell$49F$MultiVarAnalyticsGradeRegression.main($JShell$49F.java:89)
	at REPL.$JShell$56.do_it$($JShell$56.java:39)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:568)
	at io.github.spencerpark.ijava.execution.IJavaExecutionControl.lambda$execute$1(IJavaExecutionControl.java:95)
	at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	at java.base/java.lang.Thread.run(Thread.java:833)

performCrossValidation: This method conducts a cross-validation process on a dataset of student performance metrics to evaluate a regression model’s predictive accuracy. It splits the data into training and testing sets, trains the model on the training set, predicts grades for the testing set, and calculates the Mean Squared Error (MSE) to assess the model’s performance.

main: The main method is the entry point of the program, which creates an instance of MultiVarAnalyticsGradeRegression and calls the performCrossValidation method with a specified number of students to process the cross-validation of the regression model.

normalize: This static method normalizes the input data (features) by calculating the z-score for each value, which helps in standardizing the data before applying machine learning algorithms.

RegressionResult: This inner class serves as a container to hold the results of the regression analysis, including the original data, the predicted values, and the calculated coefficients.

getXData, setXData, getYData, setYData, getCoefficients, setCoefficients: These are getter and setter methods of the RegressionResult class, which allow for accessing and modifying the regression result’s data, target values, and coefficients.

getColumn: This static utility method extracts a specific column from a two-dimensional array, which is useful for operations that require processing data column-wise.

calculateCoefficients: This method calculates the regression coefficients using the least squares method with regularization (to prevent overfitting), which are essential for making predictions with the regression model.

displayChart: This method generates a scatter plot chart with an overlaid regression line, visualizing the relationship between a single predictor and the target variable, and saves it as a PNG file.

calculateQuantile: This static method calculates the specified quantile of a given array of data, which is useful for understanding the distribution of the data and for tasks like outlier detection or data normalization.