Moved to _dev

This commit is contained in:
2025-09-20 16:11:47 +02:00
parent fb1a8753b7
commit b2ba11fcd3
1670 changed files with 224899 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
import { cx } from "lib/cx";
test("cx", () => {
expect(cx("px-1", "mt-2")).toEqual("px-1 mt-2");
expect(cx("px-1", true && "mt-2")).toEqual("px-1 mt-2");
expect(cx("px-1", false && "mt-2")).toEqual("px-1");
});

View File

@@ -0,0 +1,21 @@
import { makeObjectCharIterator } from "lib/make-object-char-iterator";
test("Simple object", () => {
const start = { a: "" };
const end = { a: "abc" };
const iterator = makeObjectCharIterator(start, end);
expect(iterator.next().value).toEqual({ a: "a" });
expect(iterator.next().value).toEqual({ a: "ab" });
expect(iterator.next().value).toEqual({ a: "abc" });
expect(iterator.next().value).toEqual(undefined);
});
test("Nested object", () => {
const start = { a: { b: "" } };
const end = { a: { b: "abc" } };
const iterator = makeObjectCharIterator(start, end);
expect(iterator.next().value).toEqual({ a: { b: "a" } });
expect(iterator.next().value).toEqual({ a: { b: "ab" } });
expect(iterator.next().value).toEqual({ a: { b: "abc" } });
expect(iterator.next().value).toEqual(undefined);
});

View File

@@ -0,0 +1,16 @@
export const PX_PER_PT = 4 / 3;
// Reference: https://www.prepressure.com/library/paper-size/letter
// Letter size is commonly used in US & Canada, while A4 is the standard for rest of world.
export const LETTER_WIDTH_PT = 612;
const LETTER_HEIGHT_PT = 792;
export const LETTER_WIDTH_PX = LETTER_WIDTH_PT * PX_PER_PT;
export const LETTER_HEIGHT_PX = LETTER_HEIGHT_PT * PX_PER_PT;
// Reference: https://www.prepressure.com/library/paper-size/din-a4
export const A4_WIDTH_PT = 595;
const A4_HEIGHT_PT = 842;
export const A4_WIDTH_PX = A4_WIDTH_PT * PX_PER_PT;
export const A4_HEIGHT_PX = A4_HEIGHT_PT * PX_PER_PT;
export const DEBUG_RESUME_PDF_FLAG: true | undefined = undefined; // use undefined to disable to deal with a weird error message

View File

@@ -0,0 +1,18 @@
/**
* cx is a simple util to join classNames together. Think of it as a simplified version of the open source classnames util
* Reference: https://dev.to/gugaguichard/replace-clsx-classnames-or-classcat-with-your-own-little-helper-3bf
*
* @example
* cx('px-1', 'mt-2'); // => 'px-1 mt-2'
* cx('px-1', true && 'mt-2'); // => 'px-1 mt-2'
* cx('px-1', false && 'mt-2'); // => 'px-1'
*/
export const cx = (...classes: Array<string | boolean | undefined>) => {
const newClasses = [];
for (const c of classes) {
if (typeof c === "string") {
newClasses.push(c.trim());
}
}
return newClasses.join(" ");
};

View File

@@ -0,0 +1,8 @@
/**
* Server side object deep clone util using JSON serialization.
* Not efficient for large objects but good enough for most use cases.
*
* Client side can simply use structuredClone.
*/
export const deepClone = <T extends { [key: string]: any }>(object: T) =>
JSON.parse(JSON.stringify(object)) as T;

View File

@@ -0,0 +1,28 @@
type Object = { [key: string]: any };
const isObject = (item: any): item is Object => {
return item && typeof item === "object" && !Array.isArray(item);
};
/**
* Deep merge two objects by overriding target with fields in source.
* It returns a new object and doesn't modify any object in place since
* it deep clones the target object first.
*/
export const deepMerge = (target: Object, source: Object, level = 0) => {
const copyTarget = level === 0 ? structuredClone(target) : target;
for (const key in source) {
const sourceValue = source[key];
// Assign source value to copyTarget if source value is not an object.
// Otherwise, call deepMerge recursively to merge all its keys
if (!isObject(sourceValue)) {
copyTarget[key] = sourceValue;
} else {
if (!isObject(copyTarget[key])) {
copyTarget[key] = {};
}
deepMerge(copyTarget[key], sourceValue, level + 1);
}
}
return copyTarget;
};

View File

@@ -0,0 +1,6 @@
export const getPxPerRem = () => {
const bodyComputedStyle = getComputedStyle(
document.querySelector("body")!
) as any;
return parseFloat(bodyComputedStyle["font-size"]) || 16;
};

View File

@@ -0,0 +1,36 @@
import { useEffect, useRef } from "react";
/**
* Hook to autosize textarea height.
*
* The trick to resize is to first set its height to 0 and then set it back to scroll height.
* Reference: https://stackoverflow.com/a/25621277/7699841
*
* @example // Tailwind CSS
* const textareaRef = useAutosizeTextareaHeight({ value });
* <textarea ref={textareaRef} className="resize-none overflow-hidden"/>
*/
export const useAutosizeTextareaHeight = ({ value }: { value: string }) => {
const textareaRef = useRef<HTMLTextAreaElement>(null);
const resizeHeight = () => {
const textarea = textareaRef.current;
if (textarea) {
textarea.style.height = "0px";
textarea.style.height = `${textarea.scrollHeight}px`;
}
};
// Resize height when value changes
useEffect(() => {
resizeHeight();
}, [value]);
// Resize height when viewport resizes
useEffect(() => {
window.addEventListener("resize", resizeHeight);
return () => window.removeEventListener("resize", resizeHeight);
}, []);
return textareaRef;
};

View File

@@ -0,0 +1,33 @@
import { useEffect, useState } from "react";
const enum TailwindBreakpoint {
sm = 640,
md = 768,
lg = 1024,
xl = 1280,
"2xl" = 1536,
}
export const useTailwindBreakpoints = () => {
const [isSm, setIsSm] = useState(false);
const [isMd, setIsMd] = useState(false);
const [isLg, setIsLg] = useState(false);
const [isXl, setIsXl] = useState(false);
const [is2xl, setIs2xl] = useState(false);
useEffect(() => {
const handleResize = () => {
const screenWidth = window.innerWidth;
setIsSm(screenWidth >= TailwindBreakpoint.sm);
setIsMd(screenWidth >= TailwindBreakpoint.md);
setIsLg(screenWidth >= TailwindBreakpoint.lg);
setIsXl(screenWidth >= TailwindBreakpoint.xl);
setIs2xl(screenWidth >= TailwindBreakpoint["2xl"]);
};
handleResize();
window.addEventListener("resize", handleResize);
return () => window.removeEventListener("resize", handleResize);
}, []);
return { isSm, isMd, isLg, isXl, is2xl };
};

View File

@@ -0,0 +1,60 @@
import { deepClone } from "lib/deep-clone";
type Object = { [key: string]: any };
/**
* makeObjectCharIterator is a generator function that iterates a start object to
* match an end object state by iterating through each string character.
*
* Note: Start object and end object must have the same structure and same keys.
* And they must have string or array or object as values.
*
* @example
* const start = {a : ""}
* const end = {a : "abc"};
* const iterator = makeObjectCharIterator(start, end);
* iterator.next().value // {a : "a"}
* iterator.next().value // {a : "ab"}
* iterator.next().value // {a : "abc"}
*/
export function* makeObjectCharIterator<T extends Object>(
start: T,
end: T,
level = 0
) {
// Have to manually cast Object type and return T type due to https://github.com/microsoft/TypeScript/issues/47357
const object: Object = level === 0 ? deepClone(start) : start;
for (const [key, endValue] of Object.entries(end)) {
if (typeof endValue === "object") {
const recursiveIterator = makeObjectCharIterator(
object[key],
endValue,
level + 1
);
while (true) {
const next = recursiveIterator.next();
if (next.done) {
break;
}
yield deepClone(object) as T;
}
} else {
for (let i = 1; i <= endValue.length; i++) {
object[key] = endValue.slice(0, i);
yield deepClone(object) as T;
}
}
}
}
export const countObjectChar = (object: Object) => {
let count = 0;
for (const value of Object.values(object)) {
if (typeof value === "object") {
count += countObjectChar(value);
} else if (typeof value === "string") {
count += value.length;
}
}
return count;
};

View File

@@ -0,0 +1,122 @@
import type {
TextItem,
FeatureSet,
ResumeSectionToLines,
} from "lib/parse-resume-from-pdf/types";
import type { ResumeEducation } from "lib/redux/types";
import { getSectionLinesByKeywords } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/get-section-lines";
import { divideSectionIntoSubsections } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/subsections";
import {
DATE_FEATURE_SETS,
hasComma,
hasLetter,
hasNumber,
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/common-features";
import { getTextWithHighestFeatureScore } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/feature-scoring-system";
import {
getBulletPointsFromLines,
getDescriptionsLineIdx,
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/bullet-points";
/**
* Unique Attribute
* School Has school
* Degree Has degree
* GPA Has number
*/
// prettier-ignore
const SCHOOLS = ['College', 'University', 'Institute', 'School', 'Academy', 'BASIS', 'Magnet']
const hasSchool = (item: TextItem) =>
SCHOOLS.some((school) => item.text.includes(school));
// prettier-ignore
const DEGREES = ["Associate", "Bachelor", "Master", "PhD", "Ph."];
const hasDegree = (item: TextItem) =>
DEGREES.some((degree) => item.text.includes(degree)) ||
/[ABM][A-Z\.]/.test(item.text); // Match AA, B.S., MBA, etc.
const matchGPA = (item: TextItem) => item.text.match(/[0-4]\.\d{1,2}/);
const matchGrade = (item: TextItem) => {
const grade = parseFloat(item.text);
if (Number.isFinite(grade) && grade <= 110) {
return [String(grade)] as RegExpMatchArray;
}
return null;
};
const SCHOOL_FEATURE_SETS: FeatureSet[] = [
[hasSchool, 4],
[hasDegree, -4],
[hasNumber, -4],
];
const DEGREE_FEATURE_SETS: FeatureSet[] = [
[hasDegree, 4],
[hasSchool, -4],
[hasNumber, -3],
];
const GPA_FEATURE_SETS: FeatureSet[] = [
[matchGPA, 4, true],
[matchGrade, 3, true],
[hasComma, -3],
[hasLetter, -4],
];
export const extractEducation = (sections: ResumeSectionToLines) => {
const educations: ResumeEducation[] = [];
const educationsScores = [];
const lines = getSectionLinesByKeywords(sections, ["education"]);
const subsections = divideSectionIntoSubsections(lines);
for (const subsectionLines of subsections) {
const textItems = subsectionLines.flat();
const [school, schoolScores] = getTextWithHighestFeatureScore(
textItems,
SCHOOL_FEATURE_SETS
);
const [degree, degreeScores] = getTextWithHighestFeatureScore(
textItems,
DEGREE_FEATURE_SETS
);
const [gpa, gpaScores] = getTextWithHighestFeatureScore(
textItems,
GPA_FEATURE_SETS
);
const [date, dateScores] = getTextWithHighestFeatureScore(
textItems,
DATE_FEATURE_SETS
);
let descriptions: string[] = [];
const descriptionsLineIdx = getDescriptionsLineIdx(subsectionLines);
if (descriptionsLineIdx !== undefined) {
const descriptionsLines = subsectionLines.slice(descriptionsLineIdx);
descriptions = getBulletPointsFromLines(descriptionsLines);
}
educations.push({ school, degree, gpa, date, descriptions });
educationsScores.push({
schoolScores,
degreeScores,
gpaScores,
dateScores,
});
}
if (educations.length !== 0) {
const coursesLines = getSectionLinesByKeywords(sections, ["course"]);
if (coursesLines.length !== 0) {
educations[0].descriptions.push(
"Courses: " +
coursesLines
.flat()
.map((item) => item.text)
.join(" ")
);
}
}
return {
educations,
educationsScores,
};
};

View File

@@ -0,0 +1,187 @@
import type {
ResumeSectionToLines,
TextItem,
FeatureSet,
} from "lib/parse-resume-from-pdf/types";
import { getSectionLinesByKeywords } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/get-section-lines";
import {
isBold,
hasNumber,
hasComma,
hasLetter,
hasLetterAndIsAllUpperCase,
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/common-features";
import { getTextWithHighestFeatureScore } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/feature-scoring-system";
// Name
export const matchOnlyLetterSpaceOrPeriod = (item: TextItem) =>
item.text.match(/^[a-zA-Z\s\.]+$/);
// Email
// Simple email regex: xxx@xxx.xxx (xxx = anything not space)
export const matchEmail = (item: TextItem) => item.text.match(/\S+@\S+\.\S+/);
const hasAt = (item: TextItem) => item.text.includes("@");
// Phone
// Simple phone regex that matches (xxx)-xxx-xxxx where () and - are optional, - can also be space
export const matchPhone = (item: TextItem) =>
item.text.match(/\(?\d{3}\)?[\s-]?\d{3}[\s-]?\d{4}/);
const hasParenthesis = (item: TextItem) => /\([0-9]+\)/.test(item.text);
// Location
// Simple location regex that matches "<City>, <ST>"
export const matchCityAndState = (item: TextItem) =>
item.text.match(/[A-Z][a-zA-Z\s]+, [A-Z]{2}/);
// Url
// Simple url regex that matches "xxx.xxx/xxx" (xxx = anything not space)
export const matchUrl = (item: TextItem) => item.text.match(/\S+\.[a-z]+\/\S+/);
// Match https://xxx.xxx where s is optional
const matchUrlHttpFallback = (item: TextItem) =>
item.text.match(/https?:\/\/\S+\.\S+/);
// Match www.xxx.xxx
const matchUrlWwwFallback = (item: TextItem) =>
item.text.match(/www\.\S+\.\S+/);
const hasSlash = (item: TextItem) => item.text.includes("/");
// Summary
const has4OrMoreWords = (item: TextItem) => item.text.split(" ").length >= 4;
/**
* Unique Attribute
* Name Bold or Has all uppercase letter
* Email Has @
* Phone Has ()
* Location Has , (overlap with summary)
* Url Has slash
* Summary Has 4 or more words
*/
/**
* Name -> contains only letters/space/period, e.g. Leonardo W. DiCaprio
* (it isn't common to include middle initial in resume)
* -> is bolded or has all letters as uppercase
*/
const NAME_FEATURE_SETS: FeatureSet[] = [
[matchOnlyLetterSpaceOrPeriod, 3, true],
[isBold, 2],
[hasLetterAndIsAllUpperCase, 2],
// Match against other unique attributes
[hasAt, -4], // Email
[hasNumber, -4], // Phone
[hasParenthesis, -4], // Phone
[hasComma, -4], // Location
[hasSlash, -4], // Url
[has4OrMoreWords, -2], // Summary
];
// Email -> match email regex xxx@xxx.xxx
const EMAIL_FEATURE_SETS: FeatureSet[] = [
[matchEmail, 4, true],
[isBold, -1], // Name
[hasLetterAndIsAllUpperCase, -1], // Name
[hasParenthesis, -4], // Phone
[hasComma, -4], // Location
[hasSlash, -4], // Url
[has4OrMoreWords, -4], // Summary
];
// Phone -> match phone regex (xxx)-xxx-xxxx
const PHONE_FEATURE_SETS: FeatureSet[] = [
[matchPhone, 4, true],
[hasLetter, -4], // Name, Email, Location, Url, Summary
];
// Location -> match location regex <City>, <ST>
const LOCATION_FEATURE_SETS: FeatureSet[] = [
[matchCityAndState, 4, true],
[isBold, -1], // Name
[hasAt, -4], // Email
[hasParenthesis, -3], // Phone
[hasSlash, -4], // Url
];
// URL -> match url regex xxx.xxx/xxx
const URL_FEATURE_SETS: FeatureSet[] = [
[matchUrl, 4, true],
[matchUrlHttpFallback, 3, true],
[matchUrlWwwFallback, 3, true],
[isBold, -1], // Name
[hasAt, -4], // Email
[hasParenthesis, -3], // Phone
[hasComma, -4], // Location
[has4OrMoreWords, -4], // Summary
];
// Summary -> has 4 or more words
const SUMMARY_FEATURE_SETS: FeatureSet[] = [
[has4OrMoreWords, 4],
[isBold, -1], // Name
[hasAt, -4], // Email
[hasParenthesis, -3], // Phone
[matchCityAndState, -4, false], // Location
];
export const extractProfile = (sections: ResumeSectionToLines) => {
const lines = sections.profile || [];
const textItems = lines.flat();
const [name, nameScores] = getTextWithHighestFeatureScore(
textItems,
NAME_FEATURE_SETS
);
const [email, emailScores] = getTextWithHighestFeatureScore(
textItems,
EMAIL_FEATURE_SETS
);
const [phone, phoneScores] = getTextWithHighestFeatureScore(
textItems,
PHONE_FEATURE_SETS
);
const [location, locationScores] = getTextWithHighestFeatureScore(
textItems,
LOCATION_FEATURE_SETS
);
const [url, urlScores] = getTextWithHighestFeatureScore(
textItems,
URL_FEATURE_SETS
);
const [summary, summaryScores] = getTextWithHighestFeatureScore(
textItems,
SUMMARY_FEATURE_SETS,
undefined,
true
);
const summaryLines = getSectionLinesByKeywords(sections, ["summary"]);
const summarySection = summaryLines
.flat()
.map((textItem) => textItem.text)
.join(" ");
const objectiveLines = getSectionLinesByKeywords(sections, ["objective"]);
const objectiveSection = objectiveLines
.flat()
.map((textItem) => textItem.text)
.join(" ");
return {
profile: {
name,
email,
phone,
location,
url,
// Dedicated section takes higher precedence over profile summary
summary: summarySection || objectiveSection || summary,
},
// For debugging
profileScores: {
name: nameScores,
email: emailScores,
phone: phoneScores,
location: locationScores,
url: urlScores,
summary: summaryScores,
},
};
};

View File

@@ -0,0 +1,55 @@
import type { ResumeProject } from "lib/redux/types";
import type {
FeatureSet,
ResumeSectionToLines,
} from "lib/parse-resume-from-pdf/types";
import { getSectionLinesByKeywords } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/get-section-lines";
import {
DATE_FEATURE_SETS,
getHasText,
isBold,
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/common-features";
import { divideSectionIntoSubsections } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/subsections";
import { getTextWithHighestFeatureScore } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/feature-scoring-system";
import {
getBulletPointsFromLines,
getDescriptionsLineIdx,
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/bullet-points";
export const extractProject = (sections: ResumeSectionToLines) => {
const projects: ResumeProject[] = [];
const projectsScores = [];
const lines = getSectionLinesByKeywords(sections, ["project"]);
const subsections = divideSectionIntoSubsections(lines);
for (const subsectionLines of subsections) {
const descriptionsLineIdx = getDescriptionsLineIdx(subsectionLines) ?? 1;
const subsectionInfoTextItems = subsectionLines
.slice(0, descriptionsLineIdx)
.flat();
const [date, dateScores] = getTextWithHighestFeatureScore(
subsectionInfoTextItems,
DATE_FEATURE_SETS
);
const PROJECT_FEATURE_SET: FeatureSet[] = [
[isBold, 2],
[getHasText(date), -4],
];
const [project, projectScores] = getTextWithHighestFeatureScore(
subsectionInfoTextItems,
PROJECT_FEATURE_SET,
false
);
const descriptionsLines = subsectionLines.slice(descriptionsLineIdx);
const descriptions = getBulletPointsFromLines(descriptionsLines);
projects.push({ project, date, descriptions });
projectsScores.push({
projectScores,
dateScores,
});
}
return { projects, projectsScores };
};

View File

@@ -0,0 +1,39 @@
import {
matchOnlyLetterSpaceOrPeriod,
matchEmail,
matchPhone,
matchUrl,
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/extract-profile";
import type { TextItem } from "lib/parse-resume-from-pdf/types";
const makeTextItem = (text: string) =>
({
text,
} as TextItem);
describe("extract-profile tests - ", () => {
it("Name", () => {
expect(
matchOnlyLetterSpaceOrPeriod(makeTextItem("Leonardo W. DiCaprio"))![0]
).toBe("Leonardo W. DiCaprio");
});
it("Email", () => {
expect(matchEmail(makeTextItem(" hello@open-resume.org "))![0]).toBe(
"hello@open-resume.org"
);
});
it("Phone", () => {
expect(matchPhone(makeTextItem(" (123)456-7890 "))![0]).toBe(
"(123)456-7890"
);
});
it("Url", () => {
expect(matchUrl(makeTextItem(" linkedin.com/in/open-resume "))![0]).toBe(
"linkedin.com/in/open-resume"
);
expect(matchUrl(makeTextItem("hello@open-resume.org"))).toBeFalsy();
});
});

View File

@@ -0,0 +1,35 @@
import type { ResumeSkills } from "lib/redux/types";
import type { ResumeSectionToLines } from "lib/parse-resume-from-pdf/types";
import { deepClone } from "lib/deep-clone";
import { getSectionLinesByKeywords } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/get-section-lines";
import { initialFeaturedSkills } from "lib/redux/resumeSlice";
import {
getBulletPointsFromLines,
getDescriptionsLineIdx,
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/bullet-points";
export const extractSkills = (sections: ResumeSectionToLines) => {
const lines = getSectionLinesByKeywords(sections, ["skill"]);
const descriptionsLineIdx = getDescriptionsLineIdx(lines) ?? 0;
const descriptionsLines = lines.slice(descriptionsLineIdx);
const descriptions = getBulletPointsFromLines(descriptionsLines);
const featuredSkills = deepClone(initialFeaturedSkills);
if (descriptionsLineIdx !== 0) {
const featuredSkillsLines = lines.slice(0, descriptionsLineIdx);
const featuredSkillsTextItems = featuredSkillsLines
.flat()
.filter((item) => item.text.trim())
.slice(0, 6);
for (let i = 0; i < featuredSkillsTextItems.length; i++) {
featuredSkills[i].skill = featuredSkillsTextItems[i].text;
}
}
const skills: ResumeSkills = {
featuredSkills,
descriptions,
};
return { skills };
};

View File

@@ -0,0 +1,83 @@
import type { ResumeWorkExperience } from "lib/redux/types";
import type {
TextItem,
FeatureSet,
ResumeSectionToLines,
} from "lib/parse-resume-from-pdf/types";
import { getSectionLinesByKeywords } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/get-section-lines";
import {
DATE_FEATURE_SETS,
hasNumber,
getHasText,
isBold,
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/common-features";
import { divideSectionIntoSubsections } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/subsections";
import { getTextWithHighestFeatureScore } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/feature-scoring-system";
import {
getBulletPointsFromLines,
getDescriptionsLineIdx,
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/bullet-points";
// prettier-ignore
const WORK_EXPERIENCE_KEYWORDS_LOWERCASE = ['work', 'experience', 'employment', 'history', 'job'];
// prettier-ignore
const JOB_TITLES = ['Accountant', 'Administrator', 'Advisor', 'Agent', 'Analyst', 'Apprentice', 'Architect', 'Assistant', 'Associate', 'Auditor', 'Bartender', 'Biologist', 'Bookkeeper', 'Buyer', 'Carpenter', 'Cashier', 'CEO', 'Clerk', 'Co-op', 'Co-Founder', 'Consultant', 'Coordinator', 'CTO', 'Developer', 'Designer', 'Director', 'Driver', 'Editor', 'Electrician', 'Engineer', 'Extern', 'Founder', 'Freelancer', 'Head', 'Intern', 'Janitor', 'Journalist', 'Laborer', 'Lawyer', 'Lead', 'Manager', 'Mechanic', 'Member', 'Nurse', 'Officer', 'Operator', 'Operation', 'Photographer', 'President', 'Producer', 'Recruiter', 'Representative', 'Researcher', 'Sales', 'Server', 'Scientist', 'Specialist', 'Supervisor', 'Teacher', 'Technician', 'Trader', 'Trainee', 'Treasurer', 'Tutor', 'Vice', 'VP', 'Volunteer', 'Webmaster', 'Worker'];
const hasJobTitle = (item: TextItem) =>
JOB_TITLES.some((jobTitle) =>
item.text.split(/\s/).some((word) => word === jobTitle)
);
const hasMoreThan5Words = (item: TextItem) => item.text.split(/\s/).length > 5;
const JOB_TITLE_FEATURE_SET: FeatureSet[] = [
[hasJobTitle, 4],
[hasNumber, -4],
[hasMoreThan5Words, -2],
];
export const extractWorkExperience = (sections: ResumeSectionToLines) => {
const workExperiences: ResumeWorkExperience[] = [];
const workExperiencesScores = [];
const lines = getSectionLinesByKeywords(
sections,
WORK_EXPERIENCE_KEYWORDS_LOWERCASE
);
const subsections = divideSectionIntoSubsections(lines);
for (const subsectionLines of subsections) {
const descriptionsLineIdx = getDescriptionsLineIdx(subsectionLines) ?? 2;
const subsectionInfoTextItems = subsectionLines
.slice(0, descriptionsLineIdx)
.flat();
const [date, dateScores] = getTextWithHighestFeatureScore(
subsectionInfoTextItems,
DATE_FEATURE_SETS
);
const [jobTitle, jobTitleScores] = getTextWithHighestFeatureScore(
subsectionInfoTextItems,
JOB_TITLE_FEATURE_SET
);
const COMPANY_FEATURE_SET: FeatureSet[] = [
[isBold, 2],
[getHasText(date), -4],
[getHasText(jobTitle), -4],
];
const [company, companyScores] = getTextWithHighestFeatureScore(
subsectionInfoTextItems,
COMPANY_FEATURE_SET,
false
);
const subsectionDescriptionsLines =
subsectionLines.slice(descriptionsLineIdx);
const descriptions = getBulletPointsFromLines(subsectionDescriptionsLines);
workExperiences.push({ company, jobTitle, date, descriptions });
workExperiencesScores.push({
companyScores,
jobTitleScores,
dateScores,
});
}
return { workExperiences, workExperiencesScores };
};

View File

@@ -0,0 +1,42 @@
import type { Resume } from "lib/redux/types";
import type { ResumeSectionToLines } from "lib/parse-resume-from-pdf/types";
import { extractProfile } from "lib/parse-resume-from-pdf/extract-resume-from-sections/extract-profile";
import { extractEducation } from "lib/parse-resume-from-pdf/extract-resume-from-sections/extract-education";
import { extractWorkExperience } from "lib/parse-resume-from-pdf/extract-resume-from-sections/extract-work-experience";
import { extractProject } from "lib/parse-resume-from-pdf/extract-resume-from-sections/extract-project";
import { extractSkills } from "lib/parse-resume-from-pdf/extract-resume-from-sections/extract-skills";
/**
* Step 4. Extract resume from sections.
*
* This is the core of the resume parser to resume information from the sections.
*
* The gist of the extraction engine is a feature scoring system. Each resume attribute
* to be extracted has a custom feature sets, where each feature set consists of a
* feature matching function and a feature matching score if matched (feature matching
* score can be a positive or negative number). To compute the final feature score of
* a text item for a particular resume attribute, it would run the text item through
* all its feature sets and sum up the matching feature scores. This process is carried
* out for all text items within the section, and the text item with the highest computed
* feature score is identified as the extracted resume attribute.
*/
export const extractResumeFromSections = (
sections: ResumeSectionToLines
): Resume => {
const { profile } = extractProfile(sections);
const { educations } = extractEducation(sections);
const { workExperiences } = extractWorkExperience(sections);
const { projects } = extractProject(sections);
const { skills } = extractSkills(sections);
return {
profile,
educations,
workExperiences,
projects,
skills,
custom: {
descriptions: [],
},
};
};

View File

@@ -0,0 +1,123 @@
import type { Lines, TextItem } from "lib/parse-resume-from-pdf/types";
/**
* List of bullet points
* Reference: https://stackoverflow.com/questions/56540160/why-isnt-there-a-medium-small-black-circle-in-unicode
* U+22C5 DOT OPERATOR (⋅)
* U+2219 BULLET OPERATOR (∙)
* U+1F784 BLACK SLIGHTLY SMALL CIRCLE (🞄)
* U+2022 BULLET (•) -------- most common
* U+2981 Z NOTATION SPOT (⦁)
* U+26AB MEDIUM BLACK CIRCLE (⚫︎)
* U+25CF BLACK CIRCLE (●)
* U+2B24 BLACK LARGE CIRCLE (⬤)
* U+26AC MEDIUM SMALL WHITE CIRCLE
* U+25CB WHITE CIRCLE
*/
export const BULLET_POINTS = [
"⋅",
"∙",
"🞄",
"•",
"⦁",
"⚫︎",
"●",
"⬤",
"⚬",
"○",
];
/**
* Convert bullet point lines into a string array aka descriptions.
*/
export const getBulletPointsFromLines = (lines: Lines): string[] => {
// Simply return all lines with text item joined together if there is no bullet point
const firstBulletPointLineIndex = getFirstBulletPointLineIdx(lines);
if (firstBulletPointLineIndex === undefined) {
return lines.map((line) => line.map((item) => item.text).join(" "));
}
// Otherwise, process and remove bullet points
// Combine all lines into a single string
let lineStr = "";
for (let item of lines.flat()) {
const text = item.text;
// Make sure a space is added between 2 words
if (!lineStr.endsWith(" ") && !text.startsWith(" ")) {
lineStr += " ";
}
lineStr += text;
}
// Get the most common bullet point
const commonBulletPoint = getMostCommonBulletPoint(lineStr);
// Start line string from the beginning of the first bullet point
const firstBulletPointIndex = lineStr.indexOf(commonBulletPoint);
if (firstBulletPointIndex !== -1) {
lineStr = lineStr.slice(firstBulletPointIndex);
}
// Divide the single string using bullet point as divider
return lineStr
.split(commonBulletPoint)
.map((text) => text.trim())
.filter((text) => !!text);
};
const getMostCommonBulletPoint = (str: string): string => {
const bulletToCount: { [bullet: string]: number } = BULLET_POINTS.reduce(
(acc: { [bullet: string]: number }, cur) => {
acc[cur] = 0;
return acc;
},
{}
);
let bulletWithMostCount = BULLET_POINTS[0];
let bulletMaxCount = 0;
for (let char of str) {
if (bulletToCount.hasOwnProperty(char)) {
bulletToCount[char]++;
if (bulletToCount[char] > bulletMaxCount) {
bulletWithMostCount = char;
}
}
}
return bulletWithMostCount;
};
const getFirstBulletPointLineIdx = (lines: Lines): number | undefined => {
for (let i = 0; i < lines.length; i++) {
for (let item of lines[i]) {
if (BULLET_POINTS.some((bullet) => item.text.includes(bullet))) {
return i;
}
}
}
return undefined;
};
// Only consider words that don't contain numbers
const isWord = (str: string) => /^[^0-9]+$/.test(str);
const hasAtLeast8Words = (item: TextItem) =>
item.text.split(/\s/).filter(isWord).length >= 8;
export const getDescriptionsLineIdx = (lines: Lines): number | undefined => {
// The main heuristic to determine descriptions is to check if has bullet point
let idx = getFirstBulletPointLineIdx(lines);
// Fallback heuristic if the main heuristic doesn't apply (e.g. LinkedIn resume) to
// check if the line has at least 8 words
if (idx === undefined) {
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line.length === 1 && hasAtLeast8Words(line[0])) {
idx = i;
break;
}
}
}
return idx;
};

View File

@@ -0,0 +1,35 @@
import type { TextItem, FeatureSet } from "lib/parse-resume-from-pdf/types";
const isTextItemBold = (fontName: string) =>
fontName.toLowerCase().includes("bold");
export const isBold = (item: TextItem) => isTextItemBold(item.fontName);
export const hasLetter = (item: TextItem) => /[a-zA-Z]/.test(item.text);
export const hasNumber = (item: TextItem) => /[0-9]/.test(item.text);
export const hasComma = (item: TextItem) => item.text.includes(",");
export const getHasText = (text: string) => (item: TextItem) =>
item.text.includes(text);
export const hasOnlyLettersSpacesAmpersands = (item: TextItem) =>
/^[A-Za-z\s&]+$/.test(item.text);
export const hasLetterAndIsAllUpperCase = (item: TextItem) =>
hasLetter(item) && item.text.toUpperCase() === item.text;
// Date Features
const hasYear = (item: TextItem) => /(?:19|20)\d{2}/.test(item.text);
// prettier-ignore
const MONTHS = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'];
const hasMonth = (item: TextItem) =>
MONTHS.some(
(month) =>
item.text.includes(month) || item.text.includes(month.slice(0, 4))
);
const SEASONS = ["Summer", "Fall", "Spring", "Winter"];
const hasSeason = (item: TextItem) =>
SEASONS.some((season) => item.text.includes(season));
const hasPresent = (item: TextItem) => item.text.includes("Present");
export const DATE_FEATURE_SETS: FeatureSet[] = [
[hasYear, 1],
[hasMonth, 1],
[hasSeason, 1],
[hasPresent, 1],
[hasComma, -1],
];

View File

@@ -0,0 +1,79 @@
import type {
TextItems,
TextScores,
FeatureSet,
} from "lib/parse-resume-from-pdf/types";
const computeFeatureScores = (
textItems: TextItems,
featureSets: FeatureSet[]
): TextScores => {
const textScores = textItems.map((item) => ({
text: item.text,
score: 0,
match: false,
}));
for (let i = 0; i < textItems.length; i++) {
const textItem = textItems[i];
for (const featureSet of featureSets) {
const [hasFeature, score, returnMatchingText] = featureSet;
const result = hasFeature(textItem);
if (result) {
let text = textItem.text;
if (returnMatchingText && typeof result === "object") {
text = result[0];
}
const textScore = textScores[i];
if (textItem.text === text) {
textScore.score += score;
if (returnMatchingText) {
textScore.match = true;
}
} else {
textScores.push({ text, score, match: true });
}
}
}
}
return textScores;
};
/**
* Core util for the feature scoring system.
*
* It runs each text item through all feature sets and sums up the matching feature scores.
* It then returns the text item with the highest computed feature score.
*/
export const getTextWithHighestFeatureScore = (
textItems: TextItems,
featureSets: FeatureSet[],
returnEmptyStringIfHighestScoreIsNotPositive = true,
returnConcatenatedStringForTextsWithSameHighestScore = false
) => {
const textScores = computeFeatureScores(textItems, featureSets);
let textsWithHighestFeatureScore: string[] = [];
let highestScore = -Infinity;
for (const { text, score } of textScores) {
if (score >= highestScore) {
if (score > highestScore) {
textsWithHighestFeatureScore = [];
}
textsWithHighestFeatureScore.push(text);
highestScore = score;
}
}
if (returnEmptyStringIfHighestScoreIsNotPositive && highestScore <= 0)
return ["", textScores] as const;
// Note: If textItems is an empty array, textsWithHighestFeatureScore[0] is undefined, so we default it to empty string
const text = !returnConcatenatedStringForTextsWithSameHighestScore
? textsWithHighestFeatureScore[0] ?? ""
: textsWithHighestFeatureScore.map((s) => s.trim()).join(" ");
return [text, textScores] as const;
};

View File

@@ -0,0 +1,19 @@
import type { ResumeSectionToLines } from "lib/parse-resume-from-pdf/types";
/**
* Return section lines that contain any of the keywords.
*/
export const getSectionLinesByKeywords = (
sections: ResumeSectionToLines,
keywords: string[]
) => {
for (const sectionName in sections) {
const hasKeyWord = keywords.some((keyword) =>
sectionName.toLowerCase().includes(keyword)
);
if (hasKeyWord) {
return sections[sectionName];
}
}
return [];
};

View File

@@ -0,0 +1,93 @@
import { BULLET_POINTS } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/bullet-points";
import { isBold } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/common-features";
import type { Lines, Line, Subsections } from "lib/parse-resume-from-pdf/types";
/**
* Divide lines into subsections based on difference in line gap or bold text.
*
* For profile section, we can directly pass all the text items to the feature
* scoring systems. But for other sections, such as education and work experience,
* we have to first divide the section into subsections since there can be multiple
* schools or work experiences in the section. The feature scoring system then
* process each subsection to retrieve each's resume attributes and append the results.
*/
export const divideSectionIntoSubsections = (lines: Lines): Subsections => {
// The main heuristic to determine a subsection is to check if its vertical line gap
// is larger than the typical line gap * 1.4
const isLineNewSubsectionByLineGap =
createIsLineNewSubsectionByLineGap(lines);
let subsections = createSubsections(lines, isLineNewSubsectionByLineGap);
// Fallback heuristic if the main heuristic doesn't apply to check if the text item is bolded
if (subsections.length === 1) {
const isLineNewSubsectionByBold = (line: Line, prevLine: Line) => {
if (
!isBold(prevLine[0]) &&
isBold(line[0]) &&
// Ignore bullet points that sometimes being marked as bolded
!BULLET_POINTS.includes(line[0].text)
) {
return true;
}
return false;
};
subsections = createSubsections(lines, isLineNewSubsectionByBold);
}
return subsections;
};
type IsLineNewSubsection = (line: Line, prevLine: Line) => boolean;
const createIsLineNewSubsectionByLineGap = (
lines: Lines
): IsLineNewSubsection => {
// Extract the common typical line gap
const lineGapToCount: { [lineGap: number]: number } = {};
const linesY = lines.map((line) => line[0].y);
let lineGapWithMostCount: number = 0;
let maxCount = 0;
for (let i = 1; i < linesY.length; i++) {
const lineGap = Math.round(linesY[i - 1] - linesY[i]);
if (!lineGapToCount[lineGap]) lineGapToCount[lineGap] = 0;
lineGapToCount[lineGap] += 1;
if (lineGapToCount[lineGap] > maxCount) {
lineGapWithMostCount = lineGap;
maxCount = lineGapToCount[lineGap];
}
}
// Use common line gap to set a sub section threshold
const subsectionLineGapThreshold = lineGapWithMostCount * 1.4;
const isLineNewSubsection = (line: Line, prevLine: Line) => {
return Math.round(prevLine[0].y - line[0].y) > subsectionLineGapThreshold;
};
return isLineNewSubsection;
};
const createSubsections = (
lines: Lines,
isLineNewSubsection: IsLineNewSubsection
): Subsections => {
const subsections: Subsections = [];
let subsection: Lines = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (i === 0) {
subsection.push(line);
continue;
}
if (isLineNewSubsection(line, lines[i - 1])) {
subsections.push(subsection);
subsection = [];
}
subsection.push(line);
}
if (subsection.length > 0) {
subsections.push(subsection);
}
return subsections;
};

View File

@@ -0,0 +1,100 @@
import type { ResumeKey } from "lib/redux/types";
import type {
Line,
Lines,
ResumeSectionToLines,
} from "lib/parse-resume-from-pdf/types";
import {
hasLetterAndIsAllUpperCase,
hasOnlyLettersSpacesAmpersands,
isBold,
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/common-features";
export const PROFILE_SECTION: ResumeKey = "profile";
/**
* Step 3. Group lines into sections
*
* Every section (except the profile section) starts with a section title that
* takes up the entire line. This is a common pattern not just in resumes but
* also in books and blogs. The resume parser uses this pattern to group lines
* into the closest section title above these lines.
*/
export const groupLinesIntoSections = (lines: Lines) => {
let sections: ResumeSectionToLines = {};
let sectionName: string = PROFILE_SECTION;
let sectionLines = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const text = line[0]?.text.trim();
if (isSectionTitle(line, i)) {
sections[sectionName] = [...sectionLines];
sectionName = text;
sectionLines = [];
} else {
sectionLines.push(line);
}
}
if (sectionLines.length > 0) {
sections[sectionName] = [...sectionLines];
}
return sections;
};
const SECTION_TITLE_PRIMARY_KEYWORDS = [
"experience",
"education",
"project",
"skill",
];
const SECTION_TITLE_SECONDARY_KEYWORDS = [
"job",
"course",
"extracurricular",
"objective",
"summary", // LinkedIn generated resume has a summary section
"award",
"honor",
"project",
];
const SECTION_TITLE_KEYWORDS = [
...SECTION_TITLE_PRIMARY_KEYWORDS,
...SECTION_TITLE_SECONDARY_KEYWORDS,
];
const isSectionTitle = (line: Line, lineNumber: number) => {
const isFirstTwoLines = lineNumber < 2;
const hasMoreThanOneItemInLine = line.length > 1;
const hasNoItemInLine = line.length === 0;
if (isFirstTwoLines || hasMoreThanOneItemInLine || hasNoItemInLine) {
return false;
}
const textItem = line[0];
// The main heuristic to determine a section title is to check if the text is double emphasized
// to be both bold and all uppercase, which is generally true for a well formatted resume
if (isBold(textItem) && hasLetterAndIsAllUpperCase(textItem)) {
return true;
}
// The following is a fallback heuristic to detect section title if it includes a keyword match
// (This heuristics is not well tested and may not work well)
const text = textItem.text.trim();
const textHasAtMost2Words =
text.split(" ").filter((s) => s !== "&").length <= 2;
const startsWithCapitalLetter = /[A-Z]/.test(text.slice(0, 1));
if (
textHasAtMost2Words &&
hasOnlyLettersSpacesAmpersands(textItem) &&
startsWithCapitalLetter &&
SECTION_TITLE_KEYWORDS.some((keyword) =>
text.toLowerCase().includes(keyword)
)
) {
return true;
}
return false;
};

View File

@@ -0,0 +1,131 @@
import { BULLET_POINTS } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/bullet-points";
import type { TextItems, Line, Lines } from "lib/parse-resume-from-pdf/types";
/**
* Step 2: Group text items into lines. This returns an array where each position
* contains text items in the same line of the pdf file.
*/
export const groupTextItemsIntoLines = (textItems: TextItems): Lines => {
const lines: Lines = [];
// Group text items into lines based on hasEOL
let line: Line = [];
for (let item of textItems) {
// If item is EOL, add current line to lines and start a new empty line
if (item.hasEOL) {
if (item.text.trim() !== "") {
line.push({ ...item });
}
lines.push(line);
line = [];
}
// Otherwise, add item to current line
else if (item.text.trim() !== "") {
line.push({ ...item });
}
}
// Add last line if there is item in last line
if (line.length > 0) {
lines.push(line);
}
// Many pdf docs are not well formatted, e.g. due to converting from other docs.
// This creates many noises, where a single text item is divided into multiple
// ones. This step is to merge adjacent text items if their distance is smaller
// than a typical char width to filter out those noises.
const typicalCharWidth = getTypicalCharWidth(lines.flat());
for (let line of lines) {
// Start from the end of the line to make things easier to merge and delete
for (let i = line.length - 1; i > 0; i--) {
const currentItem = line[i];
const leftItem = line[i - 1];
const leftItemXEnd = leftItem.x + leftItem.width;
const distance = currentItem.x - leftItemXEnd;
if (distance <= typicalCharWidth) {
if (shouldAddSpaceBetweenText(leftItem.text, currentItem.text)) {
leftItem.text += " ";
}
leftItem.text += currentItem.text;
// Update leftItem width to include currentItem after merge before deleting current item
const currentItemXEnd = currentItem.x + currentItem.width;
leftItem.width = currentItemXEnd - leftItem.x;
line.splice(i, 1);
}
}
}
return lines;
};
// Sometimes a space is lost while merging adjacent text items. This accounts for some of those cases
const shouldAddSpaceBetweenText = (leftText: string, rightText: string) => {
const leftTextEnd = leftText[leftText.length - 1];
const rightTextStart = rightText[0];
const conditions = [
[":", ",", "|", ".", ...BULLET_POINTS].includes(leftTextEnd) &&
rightTextStart !== " ",
leftTextEnd !== " " && ["|", ...BULLET_POINTS].includes(rightTextStart),
];
return conditions.some((condition) => condition);
};
/**
* Return the width of a typical character. (Helper util for groupTextItemsIntoLines)
*
* A pdf file uses different characters, each with different width due to different
* font family and font size. This util first extracts the most typically used font
* family and font height, and compute the average character width using text items
* that match the typical font family and height.
*/
const getTypicalCharWidth = (textItems: TextItems): number => {
// Exclude empty space " " in calculations since its width isn't precise
textItems = textItems.filter((item) => item.text.trim() !== "");
const heightToCount: { [height: number]: number } = {};
let commonHeight = 0;
let heightMaxCount = 0;
const fontNameToCount: { [fontName: string]: number } = {};
let commonFontName = "";
let fontNameMaxCount = 0;
for (let item of textItems) {
const { text, height, fontName } = item;
// Process height
if (!heightToCount[height]) {
heightToCount[height] = 0;
}
heightToCount[height]++;
if (heightToCount[height] > heightMaxCount) {
commonHeight = height;
heightMaxCount = heightToCount[height];
}
// Process font name
if (!fontNameToCount[fontName]) {
fontNameToCount[fontName] = 0;
}
fontNameToCount[fontName] += text.length;
if (fontNameToCount[fontName] > fontNameMaxCount) {
commonFontName = fontName;
fontNameMaxCount = fontNameToCount[fontName];
}
}
// Find the text items that match common font family and height
const commonTextItems = textItems.filter(
(item) => item.fontName === commonFontName && item.height === commonHeight
);
// Aggregate total width and number of characters of all common text items
const [totalWidth, numChars] = commonTextItems.reduce(
(acc, cur) => {
const [preWidth, prevChars] = acc;
return [preWidth + cur.width, prevChars + cur.text.length];
},
[0, 0]
);
const typicalCharWidth = totalWidth / numChars;
return typicalCharWidth;
};

View File

@@ -0,0 +1,25 @@
import { readPdf } from "lib/parse-resume-from-pdf/read-pdf";
import { groupTextItemsIntoLines } from "lib/parse-resume-from-pdf/group-text-items-into-lines";
import { groupLinesIntoSections } from "lib/parse-resume-from-pdf/group-lines-into-sections";
import { extractResumeFromSections } from "lib/parse-resume-from-pdf/extract-resume-from-sections";
/**
* Resume parser util that parses a resume from a resume pdf file
*
* Note: The parser algorithm only works for single column resume in English language
*/
export const parseResumeFromPdf = async (fileUrl: string) => {
// Step 1. Read a pdf resume file into text items to prepare for processing
const textItems = await readPdf(fileUrl);
// Step 2. Group text items into lines
const lines = groupTextItemsIntoLines(textItems);
// Step 3. Group lines into sections
const sections = groupLinesIntoSections(lines);
// Step 4. Extract resume from sections
const resume = extractResumeFromSections(sections);
return resume;
};

View File

@@ -0,0 +1,89 @@
// Getting pdfjs to work is tricky. The following 3 lines would make it work
// https://stackoverflow.com/a/63486898/7699841
import * as pdfjs from "pdfjs-dist";
// @ts-ignore
import pdfjsWorker from "pdfjs-dist/build/pdf.worker.entry";
pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker;
import type { TextItem as PdfjsTextItem } from "pdfjs-dist/types/src/display/api";
import type { TextItem, TextItems } from "lib/parse-resume-from-pdf/types";
/**
* Step 1: Read pdf and output textItems by concatenating results from each page.
*
* To make processing easier, it returns a new TextItem type, which removes unused
* attributes (dir, transform), adds x and y positions, and replaces loaded font
* name with original font name.
*
* @example
* const onFileChange = async (e) => {
* const fileUrl = URL.createObjectURL(e.target.files[0]);
* const textItems = await readPdf(fileUrl);
* }
*/
export const readPdf = async (fileUrl: string): Promise<TextItems> => {
const pdfFile = await pdfjs.getDocument(fileUrl).promise;
let textItems: TextItems = [];
for (let i = 1; i <= pdfFile.numPages; i++) {
// Parse each page into text content
const page = await pdfFile.getPage(i);
const textContent = await page.getTextContent();
// Wait for font data to be loaded
await page.getOperatorList();
const commonObjs = page.commonObjs;
// Convert Pdfjs TextItem type to new TextItem type
const pageTextItems = textContent.items.map((item) => {
const {
str: text,
dir, // Remove text direction
transform,
fontName: pdfFontName,
...otherProps
} = item as PdfjsTextItem;
// Extract x, y position of text item from transform.
// As a side note, origin (0, 0) is bottom left.
// Reference: https://github.com/mozilla/pdf.js/issues/5643#issuecomment-496648719
const x = transform[4];
const y = transform[5];
// Use commonObjs to convert font name to original name (e.g. "GVDLYI+Arial-BoldMT")
// since non system font name by default is a loaded name, e.g. "g_d8_f1"
// Reference: https://github.com/mozilla/pdf.js/pull/15659
const fontObj = commonObjs.get(pdfFontName);
const fontName = fontObj.name;
// pdfjs reads a "-" as "-­" in the resume example. This is to revert it.
// Note "-­" is "-&#x00AD;" with a soft hyphen in between. It is not the same as "--"
const newText = text.replace(/-­/g, "-");
const newItem = {
...otherProps,
fontName,
text: newText,
x,
y,
};
return newItem;
});
// Some pdf's text items are not in order. This is most likely a result of creating it
// from design softwares, e.g. canvas. The commented out method can sort pageTextItems
// by y position to put them back in order. But it is not used since it might be more
// helpful to let users know that the pdf is not in order.
// pageTextItems.sort((a, b) => Math.round(b.y) - Math.round(a.y));
// Add text items of each page to total
textItems.push(...pageTextItems);
}
// Filter out empty space textItem noise
const isEmptySpace = (textItem: TextItem) =>
!textItem.hasEOL && textItem.text.trim() === "";
textItems = textItems.filter((textItem) => !isEmptySpace(textItem));
return textItems;
};

View File

@@ -0,0 +1,37 @@
import type { ResumeKey } from "lib/redux/types";
export interface TextItem {
text: string;
x: number;
y: number;
width: number;
height: number;
fontName: string;
hasEOL: boolean;
}
export type TextItems = TextItem[];
export type Line = TextItem[];
export type Lines = Line[];
export type ResumeSectionToLines = { [sectionName in ResumeKey]?: Lines } & {
[otherSectionName: string]: Lines;
};
export type Subsections = Lines[];
type FeatureScore = -4 | -3 | -2 | -1 | 0 | 1 | 2 | 3 | 4;
type ReturnMatchingTextOnly = boolean;
export type FeatureSet =
| [(item: TextItem) => boolean, FeatureScore]
| [
(item: TextItem) => RegExpMatchArray | null,
FeatureScore,
ReturnMatchingTextOnly
];
export interface TextScore {
text: string;
score: number;
match: boolean;
}
export type TextScores = TextScore[];

View File

@@ -0,0 +1,59 @@
import { useEffect } from "react";
import {
useDispatch,
useSelector,
type TypedUseSelectorHook,
} from "react-redux";
import { store, type RootState, type AppDispatch } from "lib/redux/store";
import {
loadStateFromLocalStorage,
saveStateToLocalStorage,
} from "lib/redux/local-storage";
import { initialResumeState, setResume } from "lib/redux/resumeSlice";
import {
initialSettings,
setSettings,
type Settings,
} from "lib/redux/settingsSlice";
import { deepMerge } from "lib/deep-merge";
import type { Resume } from "lib/redux/types";
export const useAppDispatch: () => AppDispatch = useDispatch;
export const useAppSelector: TypedUseSelectorHook<RootState> = useSelector;
/**
* Hook to save store to local storage on store change
*/
export const useSaveStateToLocalStorageOnChange = () => {
useEffect(() => {
const unsubscribe = store.subscribe(() => {
saveStateToLocalStorage(store.getState());
});
return unsubscribe;
}, []);
};
export const useSetInitialStore = () => {
const dispatch = useAppDispatch();
useEffect(() => {
const state = loadStateFromLocalStorage();
if (!state) return;
if (state.resume) {
// We merge the initial state with the stored state to ensure
// backward compatibility, since new fields might be added to
// the initial state over time.
const mergedResumeState = deepMerge(
initialResumeState,
state.resume
) as Resume;
dispatch(setResume(mergedResumeState));
}
if (state.settings) {
const mergedSettingsState = deepMerge(
initialSettings,
state.settings
) as Settings;
dispatch(setSettings(mergedSettingsState));
}
}, []);
};

View File

@@ -0,0 +1,26 @@
import type { RootState } from "lib/redux/store";
// Reference: https://dev.to/igorovic/simplest-way-to-persist-redux-state-to-localstorage-e67
const LOCAL_STORAGE_KEY = "open-resume-state";
export const loadStateFromLocalStorage = () => {
try {
const stringifiedState = localStorage.getItem(LOCAL_STORAGE_KEY);
if (!stringifiedState) return undefined;
return JSON.parse(stringifiedState);
} catch (e) {
return undefined;
}
};
export const saveStateToLocalStorage = (state: RootState) => {
try {
const stringifiedState = JSON.stringify(state);
localStorage.setItem(LOCAL_STORAGE_KEY, stringifiedState);
} catch (e) {
// Ignore
}
};
export const getHasUsedAppBefore = () => Boolean(loadStateFromLocalStorage());

View File

@@ -0,0 +1,225 @@
import { createSlice, type PayloadAction } from "@reduxjs/toolkit";
import type { RootState } from "lib/redux/store";
import type {
FeaturedSkill,
Resume,
ResumeEducation,
ResumeProfile,
ResumeProject,
ResumeSkills,
ResumeWorkExperience,
} from "lib/redux/types";
import type { ShowForm } from "lib/redux/settingsSlice";
export const initialProfile: ResumeProfile = {
name: "",
summary: "",
email: "",
phone: "",
location: "",
url: "",
};
export const initialWorkExperience: ResumeWorkExperience = {
company: "",
jobTitle: "",
date: "",
descriptions: [],
};
export const initialEducation: ResumeEducation = {
school: "",
degree: "",
gpa: "",
date: "",
descriptions: [],
};
export const initialProject: ResumeProject = {
project: "",
date: "",
descriptions: [],
};
export const initialFeaturedSkill: FeaturedSkill = { skill: "", rating: 4 };
export const initialFeaturedSkills: FeaturedSkill[] = Array(6).fill({
...initialFeaturedSkill,
});
export const initialSkills: ResumeSkills = {
featuredSkills: initialFeaturedSkills,
descriptions: [],
};
export const initialCustom = {
descriptions: [],
};
export const initialResumeState: Resume = {
profile: initialProfile,
workExperiences: [initialWorkExperience],
educations: [initialEducation],
projects: [initialProject],
skills: initialSkills,
custom: initialCustom,
};
// Keep the field & value type in sync with CreateHandleChangeArgsWithDescriptions (components\ResumeForm\types.ts)
export type CreateChangeActionWithDescriptions<T> = {
idx: number;
} & (
| {
field: Exclude<keyof T, "descriptions">;
value: string;
}
| { field: "descriptions"; value: string[] }
);
export const resumeSlice = createSlice({
name: "resume",
initialState: initialResumeState,
reducers: {
changeProfile: (
draft,
action: PayloadAction<{ field: keyof ResumeProfile; value: string }>
) => {
const { field, value } = action.payload;
draft.profile[field] = value;
},
changeWorkExperiences: (
draft,
action: PayloadAction<
CreateChangeActionWithDescriptions<ResumeWorkExperience>
>
) => {
const { idx, field, value } = action.payload;
const workExperience = draft.workExperiences[idx];
workExperience[field] = value as any;
},
changeEducations: (
draft,
action: PayloadAction<CreateChangeActionWithDescriptions<ResumeEducation>>
) => {
const { idx, field, value } = action.payload;
const education = draft.educations[idx];
education[field] = value as any;
},
changeProjects: (
draft,
action: PayloadAction<CreateChangeActionWithDescriptions<ResumeProject>>
) => {
const { idx, field, value } = action.payload;
const project = draft.projects[idx];
project[field] = value as any;
},
changeSkills: (
draft,
action: PayloadAction<
| { field: "descriptions"; value: string[] }
| {
field: "featuredSkills";
idx: number;
skill: string;
rating: number;
}
>
) => {
const { field } = action.payload;
if (field === "descriptions") {
const { value } = action.payload;
draft.skills.descriptions = value;
} else {
const { idx, skill, rating } = action.payload;
const featuredSkill = draft.skills.featuredSkills[idx];
featuredSkill.skill = skill;
featuredSkill.rating = rating;
}
},
changeCustom: (
draft,
action: PayloadAction<{ field: "descriptions"; value: string[] }>
) => {
const { value } = action.payload;
draft.custom.descriptions = value;
},
addSectionInForm: (draft, action: PayloadAction<{ form: ShowForm }>) => {
const { form } = action.payload;
switch (form) {
case "workExperiences": {
draft.workExperiences.push(structuredClone(initialWorkExperience));
return draft;
}
case "educations": {
draft.educations.push(structuredClone(initialEducation));
return draft;
}
case "projects": {
draft.projects.push(structuredClone(initialProject));
return draft;
}
}
},
moveSectionInForm: (
draft,
action: PayloadAction<{
form: ShowForm;
idx: number;
direction: "up" | "down";
}>
) => {
const { form, idx, direction } = action.payload;
if (form !== "skills" && form !== "custom") {
if (
(idx === 0 && direction === "up") ||
(idx === draft[form].length - 1 && direction === "down")
) {
return draft;
}
const section = draft[form][idx];
if (direction === "up") {
draft[form][idx] = draft[form][idx - 1];
draft[form][idx - 1] = section;
} else {
draft[form][idx] = draft[form][idx + 1];
draft[form][idx + 1] = section;
}
}
},
deleteSectionInFormByIdx: (
draft,
action: PayloadAction<{ form: ShowForm; idx: number }>
) => {
const { form, idx } = action.payload;
if (form !== "skills" && form !== "custom") {
draft[form].splice(idx, 1);
}
},
setResume: (draft, action: PayloadAction<Resume>) => {
return action.payload;
},
},
});
export const {
changeProfile,
changeWorkExperiences,
changeEducations,
changeProjects,
changeSkills,
changeCustom,
addSectionInForm,
moveSectionInForm,
deleteSectionInFormByIdx,
setResume,
} = resumeSlice.actions;
export const selectResume = (state: RootState) => state.resume;
export const selectProfile = (state: RootState) => state.resume.profile;
export const selectWorkExperiences = (state: RootState) =>
state.resume.workExperiences;
export const selectEducations = (state: RootState) => state.resume.educations;
export const selectProjects = (state: RootState) => state.resume.projects;
export const selectSkills = (state: RootState) => state.resume.skills;
export const selectCustom = (state: RootState) => state.resume.custom;
export default resumeSlice.reducer;

View File

@@ -0,0 +1,161 @@
import { createSlice, type PayloadAction } from "@reduxjs/toolkit";
import type { RootState } from "lib/redux/store";
export interface Settings {
themeColor: string;
fontFamily: string;
fontSize: string;
documentSize: string;
formToShow: {
workExperiences: boolean;
educations: boolean;
projects: boolean;
skills: boolean;
custom: boolean;
};
formToHeading: {
workExperiences: string;
educations: string;
projects: string;
skills: string;
custom: string;
};
formsOrder: ShowForm[];
showBulletPoints: {
educations: boolean;
projects: boolean;
skills: boolean;
custom: boolean;
};
}
export type ShowForm = keyof Settings["formToShow"];
export type FormWithBulletPoints = keyof Settings["showBulletPoints"];
export type GeneralSetting = Exclude<
keyof Settings,
"formToShow" | "formToHeading" | "formsOrder" | "showBulletPoints"
>;
export const DEFAULT_THEME_COLOR = "#38bdf8"; // sky-400
export const DEFAULT_FONT_FAMILY = "Roboto";
export const DEFAULT_FONT_SIZE = "11"; // text-base https://tailwindcss.com/docs/font-size
export const DEFAULT_FONT_COLOR = "#171717"; // text-neutral-800
export const initialSettings: Settings = {
themeColor: DEFAULT_THEME_COLOR,
fontFamily: DEFAULT_FONT_FAMILY,
fontSize: DEFAULT_FONT_SIZE,
documentSize: "Letter",
formToShow: {
workExperiences: true,
educations: true,
projects: true,
skills: true,
custom: false,
},
formToHeading: {
workExperiences: "WORK EXPERIENCE",
educations: "EDUCATION",
projects: "PROJECT",
skills: "SKILLS",
custom: "CUSTOM SECTION",
},
formsOrder: ["workExperiences", "educations", "projects", "skills", "custom"],
showBulletPoints: {
educations: true,
projects: true,
skills: true,
custom: true,
},
};
export const settingsSlice = createSlice({
name: "settings",
initialState: initialSettings,
reducers: {
changeSettings: (
draft,
action: PayloadAction<{ field: GeneralSetting; value: string }>
) => {
const { field, value } = action.payload;
draft[field] = value;
},
changeShowForm: (
draft,
action: PayloadAction<{ field: ShowForm; value: boolean }>
) => {
const { field, value } = action.payload;
draft.formToShow[field] = value;
},
changeFormHeading: (
draft,
action: PayloadAction<{ field: ShowForm; value: string }>
) => {
const { field, value } = action.payload;
draft.formToHeading[field] = value;
},
changeFormOrder: (
draft,
action: PayloadAction<{ form: ShowForm; type: "up" | "down" }>
) => {
const { form, type } = action.payload;
const lastIdx = draft.formsOrder.length - 1;
const pos = draft.formsOrder.indexOf(form);
const newPos = type === "up" ? pos - 1 : pos + 1;
const swapFormOrder = (idx1: number, idx2: number) => {
const temp = draft.formsOrder[idx1];
draft.formsOrder[idx1] = draft.formsOrder[idx2];
draft.formsOrder[idx2] = temp;
};
if (newPos >= 0 && newPos <= lastIdx) {
swapFormOrder(pos, newPos);
}
},
changeShowBulletPoints: (
draft,
action: PayloadAction<{
field: FormWithBulletPoints;
value: boolean;
}>
) => {
const { field, value } = action.payload;
draft["showBulletPoints"][field] = value;
},
setSettings: (draft, action: PayloadAction<Settings>) => {
return action.payload;
},
},
});
export const {
changeSettings,
changeShowForm,
changeFormHeading,
changeFormOrder,
changeShowBulletPoints,
setSettings,
} = settingsSlice.actions;
export const selectSettings = (state: RootState) => state.settings;
export const selectThemeColor = (state: RootState) => state.settings.themeColor;
export const selectFormToShow = (state: RootState) => state.settings.formToShow;
export const selectShowByForm = (form: ShowForm) => (state: RootState) =>
state.settings.formToShow[form];
export const selectFormToHeading = (state: RootState) =>
state.settings.formToHeading;
export const selectHeadingByForm = (form: ShowForm) => (state: RootState) =>
state.settings.formToHeading[form];
export const selectFormsOrder = (state: RootState) => state.settings.formsOrder;
export const selectIsFirstForm = (form: ShowForm) => (state: RootState) =>
state.settings.formsOrder[0] === form;
export const selectIsLastForm = (form: ShowForm) => (state: RootState) =>
state.settings.formsOrder[state.settings.formsOrder.length - 1] === form;
export const selectShowBulletPoints =
(form: FormWithBulletPoints) => (state: RootState) =>
state.settings.showBulletPoints[form];
export default settingsSlice.reducer;

View File

@@ -0,0 +1,13 @@
import { configureStore } from "@reduxjs/toolkit";
import resumeReducer from "lib/redux/resumeSlice";
import settingsReducer from "lib/redux/settingsSlice";
export const store = configureStore({
reducer: {
resume: resumeReducer,
settings: settingsReducer,
},
});
export type RootState = ReturnType<typeof store.getState>;
export type AppDispatch = typeof store.dispatch;

View File

@@ -0,0 +1,54 @@
export interface ResumeProfile {
name: string;
email: string;
phone: string;
url: string;
summary: string;
location: string;
}
export interface ResumeWorkExperience {
company: string;
jobTitle: string;
date: string;
descriptions: string[];
}
export interface ResumeEducation {
school: string;
degree: string;
date: string;
gpa: string;
descriptions: string[];
}
export interface ResumeProject {
project: string;
date: string;
descriptions: string[];
}
export interface FeaturedSkill {
skill: string;
rating: number;
}
export interface ResumeSkills {
featuredSkills: FeaturedSkill[];
descriptions: string[];
}
export interface ResumeCustom {
descriptions: string[];
}
export interface Resume {
profile: ResumeProfile;
workExperiences: ResumeWorkExperience[];
educations: ResumeEducation[];
projects: ResumeProject[];
skills: ResumeSkills;
custom: ResumeCustom;
}
export type ResumeKey = keyof Resume;