Step 1: Simple Data Generation
I began by simulating a basic dataset. My goal was to generate a set of random numbers to represent one aspect of my data - specifically, the number of commits by students. I utilized Java’s Random class, which provides a straightforward way to generate random numbers. The random.nextDouble() method gave me a double between 0.0 and 1.0, which I scaled to the range I needed.
%jars /home/vishnuaa77/vscode/vishnu/lib/jfreechart-1.5.4.jar
%jars /home/vishnuaa77/vscode/vishnu/lib/jcommon-1.0.24.jar
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartUtils;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.plot.PlotOrientation;
import org.jfree.data.xy.XYSeries;
import org.jfree.data.xy.XYSeriesCollection;
import java.io.File;
public class DataVisualization {
public static void generateAndSaveChart(double[] xData, double[] yData, String title, String xAxisLabel, String yAxisLabel, String filePath) {
XYSeries series = new XYSeries(title);
for (int i = 0; i < xData.length; i++) {
series.add(xData[i], yData[i]);
}
XYSeriesCollection dataset = new XYSeriesCollection(series);
JFreeChart chart = ChartFactory.createScatterPlot(
title,
xAxisLabel,
yAxisLabel,
dataset,
PlotOrientation.VERTICAL,
true,
true,
false
);
try {
ChartUtils.saveChartAsPNG(new File(filePath), chart, 800, 600);
System.out.println("Chart has been saved as " + filePath);
} catch (Exception e) {
System.err.println("Problem occurred creating chart.");
}
}
}
import java.util.Random;
public class MockDataGenerator {
private static final Random random = new Random();
public static double[] generateCommits(int numStudents) {
double[] commits = new double[numStudents];
for (int i = 0; i < numStudents; i++) {
commits[i] = 30 + random.nextDouble() * 170; // Uniformly distributed
}
return commits;
}
// ... other methods will be added here ...
public static void main(String[] args) {
MockDataGenerator.main(null);
}
}
Step 2: Introducing Skewness
Data in the real world isn’t always uniformly distributed. For example, I noticed that most students might have a lower number of commits, with only a few having very high numbers. To simulate this, I skewed the distribution by squaring the random number, which biased the data towards the lower end.
// Step 2: Introducing Skewness
public class SkewedDataGenerator {
private static final Random random = new Random();
public static void main(String[] args) {
double[] skewedCommits = generateSkewedCommits(250);
double[] grades = generateGrades(skewedCommits);
DataVisualization.generateAndSaveChart(skewedCommits, grades, "Grade vs Skewed Commits", "Skewed Commits", "Grades", "step2_chart.png");
}
public static double[] generateSkewedCommits(int numStudents) {
double[] commits = new double[numStudents];
for (int i = 0; i < numStudents; i++) {
// Skewing the data towards lower numbers
commits[i] = 30 + (int) (Math.pow(random.nextDouble(), 2) * 170);
}
return commits;
}
// Placeholder for generating grades based on skewed commits
public static double[] generateGrades(double[] commits) {
double[] grades = new double[commits.length];
for (int i = 0; i < commits.length; i++) {
grades[i] = Math.min(100, commits[i] / 2); // Simplistic grade calculation
}
return grades;
}
}
SkewedDataGenerator.main(null);
Chart has been saved as step2_chart.png
Step 3: Adding More Variables
Next, I considered additional variables that could affect a student’s performance. Besides commits, students might also contribute through pull requests, issues, and by contributing to different repositories. I hypothesized that these activities were somewhat proportional to the number of commits, so I generated them based on the commits data.
public class MultipleVariablesDataGenerator {
private static final Random random = new Random();
public static void main(String[] args) {
double[][] xData = generateMultipleVariables(250);
double[] grades = generateGradesBasedOnMultipleVariables(xData);
// Generate and save a chart for each variable
for (int i = 0; i < xData[0].length; i++) {
double[] singleVariableData = extractSingleVariable(xData, i);
String variableName = getVariableName(i);
DataVisualization.generateAndSaveChart(singleVariableData, grades, "Grade vs " + variableName, variableName, "Grades", "step3_chart_" + variableName + ".png");
}
}
public static double[] generateSkewedCommits(int numStudents) {
double[] commits = new double[numStudents];
for (int i = 0; i < numStudents; i++) {
commits[i] = 30 + (int) (Math.pow(random.nextDouble(), 2) * 170); // Skewed towards 30
}
return commits;
}
public static double[][] generateMultipleVariables(int numStudents) {
double[][] xData = new double[numStudents][4];
for (int i = 0; i < numStudents; i++) {
xData[i][0] = generateSkewedCommits(numStudents)[i]; // Commits
xData[i][1] = 5 + (int) (xData[i][0] * 0.25 * random.nextDouble()); // Pull Requests
xData[i][2] = 10 + (int) (xData[i][0] * 0.5 * random.nextDouble()); // Issues
xData[i][3] = 2 + (int) (xData[i][0] * 0.1 * random.nextDouble()); // Repos Contributed
}
return xData;
}
// Placeholder for generating grades based on multiple variables
public static double[] generateGradesBasedOnMultipleVariables(double[][] xData) {
double[] grades = new double[xData.length];
for (int i = 0; i < xData.length; i++) {
grades[i] = Math.min(100, (xData[i][0] + xData[i][1] + xData[i][2] + xData[i][3]) / 4); // Simplistic grade calculation
}
return grades;
}
public static double[] extractSingleVariable(double[][] xData, int variableIndex) {
double[] singleVariableData = new double[xData.length];
for (int i = 0; i < xData.length; i++) {
singleVariableData[i] = xData[i][variableIndex];
}
return singleVariableData;
}
public static String getVariableName(int index) {
switch (index) {
case 0:
return "Commits";
case 1:
return "PullRequests";
case 2:
return "Issues";
case 3:
return "ReposContributed";
default:
return "Unknown";
}
}
}
MultipleVariablesDataGenerator.main(null);
Chart has been saved as step3_chart_Commits.png
Chart has been saved as step3_chart_PullRequests.png
Chart has been saved as step3_chart_Issues.png
Chart has been saved as step3_chart_ReposContributed.png
Step 4: Generating and Refining the Grade
In this step, I focused on the dependent variable, which is the grade. It’s calculated based on GitHub activities: commits, pull requests, issues, and repositories contributed to. Each activity has a different weight in the final grade: commits (0.4), pull requests (0.2), issues (0.2), and repositories contributed to (0.2).
To ensure fairness and realism in grading, I capped the maximum grade at 100 and applied a logarithmic scale to moderate the influence of higher activity counts. This approach prevents disproportionately high grades for extreme values.
public class GradeDataGenerator {
private static final Random random = new Random();
public static void main(String[] args) {
double[][] xData = generateMultipleVariables(250);
double[] grades = generateGrades(xData);
// Generate and save charts for each variable
String[] descriptors = {"Commits", "PullRequests", "Issues", "ReposContributed"};
for (int i = 0; i < descriptors.length; i++) {
DataVisualization.generateAndSaveChart(
extractSingleVariable(xData, i),
grades,
"Grade vs " + descriptors[i],
descriptors[i],
"Grades",
"step4_Chart_" + descriptors[i] + ".png"
);
}
}
public static double[] generateSkewedCommits(int numStudents) {
double[] commits = new double[numStudents];
for (int i = 0; i < numStudents; i++) {
commits[i] = 30 + (int) (Math.pow(random.nextDouble(), 2) * 170); // Skewed towards 30
}
return commits;
}
public static double[][] generateMultipleVariables(int numStudents) {
double[][] xData = new double[numStudents][4];
for (int i = 0; i < numStudents; i++) {
xData[i][0] = generateSkewedCommits(numStudents)[i]; // Commits
xData[i][1] = 5 + (int) (xData[i][0] * 0.25 * random.nextDouble()); // Pull Requests
xData[i][2] = 10 + (int) (xData[i][0] * 0.5 * random.nextDouble()); // Issues
xData[i][3] = 2 + (int) (xData[i][0] * 0.1 * random.nextDouble()); // Repos Contributed
}
return xData;
}
public static double[] generateGrades(double[][] xData) {
double[] grades = new double[xData.length];
for (int i = 0; i < xData.length; i++) {
grades[i] = calculateGrade(
(int) xData[i][0], // Commits
(int) xData[i][1], // Pull Requests
(int) xData[i][2], // Issues
(int) xData[i][3] // Repos Contributed
);
}
return grades;
}
private static double calculateGrade(int commits, int pullRequests, int issues, int reposContributed) {
double commitGrade = Math.min(100, 70 + (30 * (1 - 1 / Math.log(commits + 1))));
double pullRequestGrade = Math.min(100, 70 + (30 * (1 - 1 / Math.log(pullRequests + 1))));
double issueGrade = Math.min(100, 70 + (30 * (1 - 1 / Math.log(issues + 1))));
double repoGrade = Math.min(100, 70 + (30 * (1 - 1 / Math.log(reposContributed + 1))));
// Weights: Commits 0.4, Pull Requests 0.2, Issues 0.2, Repos Contributed 0.2
return 0.4 * commitGrade + 0.2 * pullRequestGrade + 0.2 * issueGrade + 0.2 * repoGrade;
}
// Helper method to extract a single variable from the 2D array
private static double[] extractSingleVariable(double[][] xData, int variableIndex) {
double[] variableData = new double[xData.length];
for (int i = 0; i < xData.length; i++) {
variableData[i] = xData[i][variableIndex];
}
return variableData;
}
}
GradeDataGenerator.main(null);
Chart has been saved as step4_Chart_Commits.png
Chart has been saved as step4_Chart_PullRequests.png
Chart has been saved as step4_Chart_Issues.png
Chart has been saved as step4_Chart_ReposContributed.png
Step 6: Minor Fixes and Putting It All Together
For the CalculateGrade function, I might first employ a linear function and then an logarithmic function in the hope of preventing grades from rising too quickly at first. I then combined all these steps into the MockDataGenerator class, providing a comprehensive set of data that reflected the various factors influencing student grades.
public class MockDataGenerator {
private static final Random random = new Random();
public static void main(String[] args) {
double[][] xData = generateXData(250);
double[] yData = generateYData(xData);
// Generate and save charts for each variable
for (int i = 0; i < xData[0].length; i++) {
DataVisualization.generateAndSaveChart(
extractSingleVariable(xData, i),
yData,
"Step4 Chart " + getVariableName(i),
getVariableName(i),
"Grades",
"step5_chart_" + getVariableName(i).toLowerCase() + ".png"
);
}
}
public static double[][] generateXData(int numStudents) {
double[][] xData = new double[numStudents][4];
for (int i = 0; i < numStudents; i++) {
int commits = 30 + (int) (Math.pow(random.nextDouble(), 2) * 170); // Skewed towards 30
int pullRequests = 5 + (int) (commits * 0.25 * random.nextDouble()); // Based on commits
int issues = 10 + (int) (commits * 0.5 * random.nextDouble()); // Based on commits
int reposContributed = 2 + (int) (commits * 0.1 * random.nextDouble()); // Based on commits
xData[i][0] = commits;
xData[i][1] = pullRequests;
xData[i][2] = issues;
xData[i][3] = reposContributed;
}
return xData;
}
public static double[] generateYData(double[][] xData) {
double[] yData = new double[xData.length];
for (int i = 0; i < xData.length; i++) {
yData[i] = calculateGrade((int)xData[i][0], (int)xData[i][1], (int)xData[i][2], (int)xData[i][3]);
}
return yData;
}
private static double calculateGrade(int commits, int pullRequests, int issues, int reposContributed) {
double commitGrade = (commits <= 40) ? 60 + (commits - 30) * 0.75 : Math.min(100, 90 + (10 * (1 - 1 / Math.log(commits - 29))));
double pullRequestGrade = (pullRequests <= 10) ? 60 + (pullRequests - 5) * 2.25 : Math.min(100, 90 + (10 * (1 - 1 / Math.log(pullRequests - 4))));
double issueGrade = (issues <= 30) ? 60 + (issues - 10) * 0.96 : Math.min(100, 90 + (10 * (1 - 1 / Math.log(issues - 9))));
double repoGrade = (reposContributed <= 7) ? 60 + (reposContributed - 2) * 3.5 : Math.min(100, 90 + (10 * (1 - 1 / Math.log(reposContributed - 1))));
return 0.4 * commitGrade + 0.2 * pullRequestGrade + 0.2 * issueGrade + 0.2 * repoGrade;
}
// Helper method to extract a single variable from the 2D array
private static double[] extractSingleVariable(double[][] xData, int variableIndex) {
double[] variableData = new double[xData.length];
for (int i = 0; i < xData.length; i++) {
variableData[i] = xData[i][variableIndex];
}
return variableData;
}
// Helper method to get the name of the variable by index
private static String getVariableName(int index) {
switch (index) {
case 0: return "Commits";
case 1: return "PullRequests";
case 2: return "Issues";
case 3: return "ReposContributed";
default: return "Unknown";
}
}
}
MockDataGenerator.main(null);
Chart has been saved as step5_chart_commits.png
Chart has been saved as step5_chart_pullrequests.png
Chart has been saved as step5_chart_issues.png
Chart has been saved as step5_chart_reposcontributed.png