Step 1: Simple Data Generation

I began by simulating a basic dataset. My goal was to generate a set of random numbers to represent one aspect of my data - specifically, the number of commits by students. I utilized Java’s Random class, which provides a straightforward way to generate random numbers. The random.nextDouble() method gave me a double between 0.0 and 1.0, which I scaled to the range I needed.

%jars /home/vishnuaa77/vscode/vishnu/lib/jfreechart-1.5.4.jar
%jars /home/vishnuaa77/vscode/vishnu/lib/jcommon-1.0.24.jar

import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartUtils;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.plot.PlotOrientation;
import org.jfree.data.xy.XYSeries;
import org.jfree.data.xy.XYSeriesCollection;

import java.io.File;

public class DataVisualization {

    public static void generateAndSaveChart(double[] xData, double[] yData, String title, String xAxisLabel, String yAxisLabel, String filePath) {
        XYSeries series = new XYSeries(title);
        for (int i = 0; i < xData.length; i++) {
            series.add(xData[i], yData[i]);
        }
        XYSeriesCollection dataset = new XYSeriesCollection(series);
        JFreeChart chart = ChartFactory.createScatterPlot(
                title,
                xAxisLabel,
                yAxisLabel,
                dataset,
                PlotOrientation.VERTICAL,
                true,
                true,
                false
        );

        try {
            ChartUtils.saveChartAsPNG(new File(filePath), chart, 800, 600);
            System.out.println("Chart has been saved as " + filePath);
        } catch (Exception e) {
            System.err.println("Problem occurred creating chart.");
        }
    }
}
import java.util.Random;

public class MockDataGenerator {
    private static final Random random = new Random();

    public static double[] generateCommits(int numStudents) {
        double[] commits = new double[numStudents];
        for (int i = 0; i < numStudents; i++) {
            commits[i] = 30 + random.nextDouble() * 170; // Uniformly distributed
        }
        return commits;
    }
    // ... other methods will be added here ...
    
    public static void main(String[] args) {
        MockDataGenerator.main(null);
    }
}

Step 2: Introducing Skewness

Data in the real world isn’t always uniformly distributed. For example, I noticed that most students might have a lower number of commits, with only a few having very high numbers. To simulate this, I skewed the distribution by squaring the random number, which biased the data towards the lower end.

// Step 2: Introducing Skewness
public class SkewedDataGenerator {
    private static final Random random = new Random();

    public static void main(String[] args) {
        double[] skewedCommits = generateSkewedCommits(250);
        double[] grades = generateGrades(skewedCommits);
        DataVisualization.generateAndSaveChart(skewedCommits, grades, "Grade vs Skewed Commits", "Skewed Commits", "Grades", "step2_chart.png");
    }

    public static double[] generateSkewedCommits(int numStudents) {
        double[] commits = new double[numStudents];
        for (int i = 0; i < numStudents; i++) {
            // Skewing the data towards lower numbers
            commits[i] = 30 + (int) (Math.pow(random.nextDouble(), 2) * 170);
        }
        return commits;
    }

    // Placeholder for generating grades based on skewed commits
    public static double[] generateGrades(double[] commits) {
        double[] grades = new double[commits.length];
        for (int i = 0; i < commits.length; i++) {
            grades[i] = Math.min(100, commits[i] / 2); // Simplistic grade calculation
        }
        return grades;
    }
}

SkewedDataGenerator.main(null);
Chart has been saved as step2_chart.png

Step 2: Grade vs Skewed Commits

Step 3: Adding More Variables

Next, I considered additional variables that could affect a student’s performance. Besides commits, students might also contribute through pull requests, issues, and by contributing to different repositories. I hypothesized that these activities were somewhat proportional to the number of commits, so I generated them based on the commits data.

public class MultipleVariablesDataGenerator {
    private static final Random random = new Random();

    public static void main(String[] args) {
        double[][] xData = generateMultipleVariables(250);
        double[] grades = generateGradesBasedOnMultipleVariables(xData);
        // Generate and save a chart for each variable
        for (int i = 0; i < xData[0].length; i++) {
            double[] singleVariableData = extractSingleVariable(xData, i);
            String variableName = getVariableName(i);
            DataVisualization.generateAndSaveChart(singleVariableData, grades, "Grade vs " + variableName, variableName, "Grades", "step3_chart_" + variableName + ".png");
        }
    }

    public static double[] generateSkewedCommits(int numStudents) {
        double[] commits = new double[numStudents];
        for (int i = 0; i < numStudents; i++) {
            commits[i] = 30 + (int) (Math.pow(random.nextDouble(), 2) * 170); // Skewed towards 30
        }
        return commits;
    }

    public static double[][] generateMultipleVariables(int numStudents) {
        double[][] xData = new double[numStudents][4];
        for (int i = 0; i < numStudents; i++) {
            xData[i][0] = generateSkewedCommits(numStudents)[i]; // Commits
            xData[i][1] = 5 + (int) (xData[i][0] * 0.25 * random.nextDouble()); // Pull Requests
            xData[i][2] = 10 + (int) (xData[i][0] * 0.5 * random.nextDouble()); // Issues
            xData[i][3] = 2 + (int) (xData[i][0] * 0.1 * random.nextDouble()); // Repos Contributed
        }
        return xData;
    }

    // Placeholder for generating grades based on multiple variables
    public static double[] generateGradesBasedOnMultipleVariables(double[][] xData) {
        double[] grades = new double[xData.length];
        for (int i = 0; i < xData.length; i++) {
            grades[i] = Math.min(100, (xData[i][0] + xData[i][1] + xData[i][2] + xData[i][3]) / 4); // Simplistic grade calculation
        }
        return grades;
    }

    public static double[] extractSingleVariable(double[][] xData, int variableIndex) {
        double[] singleVariableData = new double[xData.length];
        for (int i = 0; i < xData.length; i++) {
            singleVariableData[i] = xData[i][variableIndex];
        }
        return singleVariableData;
    }

    public static String getVariableName(int index) {
        switch (index) {
            case 0:
                return "Commits";
            case 1:
                return "PullRequests";
            case 2:
                return "Issues";
            case 3:
                return "ReposContributed";
            default:
                return "Unknown";
        }
    }
}

MultipleVariablesDataGenerator.main(null);
Chart has been saved as step3_chart_Commits.png
Chart has been saved as step3_chart_PullRequests.png
Chart has been saved as step3_chart_Issues.png
Chart has been saved as step3_chart_ReposContributed.png

Grade vs Commits Grade vs PullRequests Grade vs Issues Grade vs ReposContributed

Step 4: Generating and Refining the Grade

In this step, I focused on the dependent variable, which is the grade. It’s calculated based on GitHub activities: commits, pull requests, issues, and repositories contributed to. Each activity has a different weight in the final grade: commits (0.4), pull requests (0.2), issues (0.2), and repositories contributed to (0.2).

To ensure fairness and realism in grading, I capped the maximum grade at 100 and applied a logarithmic scale to moderate the influence of higher activity counts. This approach prevents disproportionately high grades for extreme values.

public class GradeDataGenerator {
    private static final Random random = new Random();

    public static void main(String[] args) {
        double[][] xData = generateMultipleVariables(250);
        double[] grades = generateGrades(xData);
        
        // Generate and save charts for each variable
        String[] descriptors = {"Commits", "PullRequests", "Issues", "ReposContributed"};
        for (int i = 0; i < descriptors.length; i++) {
            DataVisualization.generateAndSaveChart(
                extractSingleVariable(xData, i),
                grades,
                "Grade vs " + descriptors[i],
                descriptors[i],
                "Grades",
                "step4_Chart_" + descriptors[i] + ".png"
            );
        }
    }

    public static double[] generateSkewedCommits(int numStudents) {
        double[] commits = new double[numStudents];
        for (int i = 0; i < numStudents; i++) {
            commits[i] = 30 + (int) (Math.pow(random.nextDouble(), 2) * 170); // Skewed towards 30
        }
        return commits;
    }

    public static double[][] generateMultipleVariables(int numStudents) {
        double[][] xData = new double[numStudents][4];
        for (int i = 0; i < numStudents; i++) {
            xData[i][0] = generateSkewedCommits(numStudents)[i]; // Commits
            xData[i][1] = 5 + (int) (xData[i][0] * 0.25 * random.nextDouble()); // Pull Requests
            xData[i][2] = 10 + (int) (xData[i][0] * 0.5 * random.nextDouble()); // Issues
            xData[i][3] = 2 + (int) (xData[i][0] * 0.1 * random.nextDouble()); // Repos Contributed
        }
        return xData;
    }

    public static double[] generateGrades(double[][] xData) {
        double[] grades = new double[xData.length];
        for (int i = 0; i < xData.length; i++) {
            grades[i] = calculateGrade(
                (int) xData[i][0], // Commits
                (int) xData[i][1], // Pull Requests
                (int) xData[i][2], // Issues
                (int) xData[i][3]  // Repos Contributed
            );
        }
        return grades;
    }

    private static double calculateGrade(int commits, int pullRequests, int issues, int reposContributed) {
        double commitGrade = Math.min(100, 70 + (30 * (1 - 1 / Math.log(commits + 1))));
        double pullRequestGrade = Math.min(100, 70 + (30 * (1 - 1 / Math.log(pullRequests + 1))));
        double issueGrade = Math.min(100, 70 + (30 * (1 - 1 / Math.log(issues + 1))));
        double repoGrade = Math.min(100, 70 + (30 * (1 - 1 / Math.log(reposContributed + 1))));

        // Weights: Commits 0.4, Pull Requests 0.2, Issues 0.2, Repos Contributed 0.2
        return 0.4 * commitGrade + 0.2 * pullRequestGrade + 0.2 * issueGrade + 0.2 * repoGrade;
    }

    // Helper method to extract a single variable from the 2D array
    private static double[] extractSingleVariable(double[][] xData, int variableIndex) {
        double[] variableData = new double[xData.length];
        for (int i = 0; i < xData.length; i++) {
            variableData[i] = xData[i][variableIndex];
        }
        return variableData;
    }
}

GradeDataGenerator.main(null);
Chart has been saved as step4_Chart_Commits.png
Chart has been saved as step4_Chart_PullRequests.png
Chart has been saved as step4_Chart_Issues.png
Chart has been saved as step4_Chart_ReposContributed.png

Grade vs Commits Grade vs PullRequests Grade vs Issues Grade vs ReposContributed

Step 6: Minor Fixes and Putting It All Together

For the CalculateGrade function, I might first employ a linear function and then an logarithmic function in the hope of preventing grades from rising too quickly at first. I then combined all these steps into the MockDataGenerator class, providing a comprehensive set of data that reflected the various factors influencing student grades.

public class MockDataGenerator {

    private static final Random random = new Random();

    public static void main(String[] args) {
        double[][] xData = generateXData(250);
        double[] yData = generateYData(xData);

        // Generate and save charts for each variable
        for (int i = 0; i < xData[0].length; i++) {
            DataVisualization.generateAndSaveChart(
                extractSingleVariable(xData, i),
                yData,
                "Step4 Chart " + getVariableName(i),
                getVariableName(i),
                "Grades",
                "step5_chart_" + getVariableName(i).toLowerCase() + ".png"
            );
        }
    }

    public static double[][] generateXData(int numStudents) {
        double[][] xData = new double[numStudents][4];

        for (int i = 0; i < numStudents; i++) {
            int commits = 30 + (int) (Math.pow(random.nextDouble(), 2) * 170); // Skewed towards 30
            int pullRequests = 5 + (int) (commits * 0.25 * random.nextDouble()); // Based on commits
            int issues = 10 + (int) (commits * 0.5 * random.nextDouble()); // Based on commits
            int reposContributed = 2 + (int) (commits * 0.1 * random.nextDouble()); // Based on commits

            xData[i][0] = commits;
            xData[i][1] = pullRequests;
            xData[i][2] = issues;
            xData[i][3] = reposContributed;
        }

        return xData;
    }

    public static double[] generateYData(double[][] xData) {
        double[] yData = new double[xData.length];

        for (int i = 0; i < xData.length; i++) {
            yData[i] = calculateGrade((int)xData[i][0], (int)xData[i][1], (int)xData[i][2], (int)xData[i][3]);
        }

        return yData;
    }

    private static double calculateGrade(int commits, int pullRequests, int issues, int reposContributed) {
        double commitGrade = (commits <= 40) ? 60 + (commits - 30) * 0.75 : Math.min(100, 90 + (10 * (1 - 1 / Math.log(commits - 29))));
        double pullRequestGrade = (pullRequests <= 10) ? 60 + (pullRequests - 5) * 2.25 : Math.min(100, 90 + (10 * (1 - 1 / Math.log(pullRequests - 4))));
        double issueGrade = (issues <= 30) ? 60 + (issues - 10) * 0.96 : Math.min(100, 90 + (10 * (1 - 1 / Math.log(issues - 9))));
        double repoGrade = (reposContributed <= 7) ? 60 + (reposContributed - 2) * 3.5 : Math.min(100, 90 + (10 * (1 - 1 / Math.log(reposContributed - 1))));

        return 0.4 * commitGrade + 0.2 * pullRequestGrade + 0.2 * issueGrade + 0.2 * repoGrade;
    }
    
    // Helper method to extract a single variable from the 2D array
    private static double[] extractSingleVariable(double[][] xData, int variableIndex) {
        double[] variableData = new double[xData.length];
        for (int i = 0; i < xData.length; i++) {
            variableData[i] = xData[i][variableIndex];
        }
        return variableData;
    }

    // Helper method to get the name of the variable by index
    private static String getVariableName(int index) {
        switch (index) {
            case 0: return "Commits";
            case 1: return "PullRequests";
            case 2: return "Issues";
            case 3: return "ReposContributed";
            default: return "Unknown";
        }
    }
}

MockDataGenerator.main(null);
Chart has been saved as step5_chart_commits.png
Chart has been saved as step5_chart_pullrequests.png
Chart has been saved as step5_chart_issues.png
Chart has been saved as step5_chart_reposcontributed.png

Grade vs Commits Grade vs PullRequests Grade vs Issues Grade vs ReposContributed