84 lines
3.6 KiB
TypeScript
84 lines
3.6 KiB
TypeScript
import type { ResumeWorkExperience } from "lib/redux/types";
|
|
import type {
|
|
TextItem,
|
|
FeatureSet,
|
|
ResumeSectionToLines,
|
|
} from "lib/parse-resume-from-pdf/types";
|
|
import { getSectionLinesByKeywords } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/get-section-lines";
|
|
import {
|
|
DATE_FEATURE_SETS,
|
|
hasNumber,
|
|
getHasText,
|
|
isBold,
|
|
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/common-features";
|
|
import { divideSectionIntoSubsections } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/subsections";
|
|
import { getTextWithHighestFeatureScore } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/feature-scoring-system";
|
|
import {
|
|
getBulletPointsFromLines,
|
|
getDescriptionsLineIdx,
|
|
} from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/bullet-points";
|
|
|
|
// prettier-ignore
|
|
const WORK_EXPERIENCE_KEYWORDS_LOWERCASE = ['work', 'experience', 'employment', 'history', 'job'];
|
|
// prettier-ignore
|
|
const JOB_TITLES = ['Accountant', 'Administrator', 'Advisor', 'Agent', 'Analyst', 'Apprentice', 'Architect', 'Assistant', 'Associate', 'Auditor', 'Bartender', 'Biologist', 'Bookkeeper', 'Buyer', 'Carpenter', 'Cashier', 'CEO', 'Clerk', 'Co-op', 'Co-Founder', 'Consultant', 'Coordinator', 'CTO', 'Developer', 'Designer', 'Director', 'Driver', 'Editor', 'Electrician', 'Engineer', 'Extern', 'Founder', 'Freelancer', 'Head', 'Intern', 'Janitor', 'Journalist', 'Laborer', 'Lawyer', 'Lead', 'Manager', 'Mechanic', 'Member', 'Nurse', 'Officer', 'Operator', 'Operation', 'Photographer', 'President', 'Producer', 'Recruiter', 'Representative', 'Researcher', 'Sales', 'Server', 'Scientist', 'Specialist', 'Supervisor', 'Teacher', 'Technician', 'Trader', 'Trainee', 'Treasurer', 'Tutor', 'Vice', 'VP', 'Volunteer', 'Webmaster', 'Worker'];
|
|
|
|
const hasJobTitle = (item: TextItem) =>
|
|
JOB_TITLES.some((jobTitle) =>
|
|
item.text.split(/\s/).some((word) => word === jobTitle)
|
|
);
|
|
const hasMoreThan5Words = (item: TextItem) => item.text.split(/\s/).length > 5;
|
|
const JOB_TITLE_FEATURE_SET: FeatureSet[] = [
|
|
[hasJobTitle, 4],
|
|
[hasNumber, -4],
|
|
[hasMoreThan5Words, -2],
|
|
];
|
|
|
|
export const extractWorkExperience = (sections: ResumeSectionToLines) => {
|
|
const workExperiences: ResumeWorkExperience[] = [];
|
|
const workExperiencesScores = [];
|
|
const lines = getSectionLinesByKeywords(
|
|
sections,
|
|
WORK_EXPERIENCE_KEYWORDS_LOWERCASE
|
|
);
|
|
const subsections = divideSectionIntoSubsections(lines);
|
|
|
|
for (const subsectionLines of subsections) {
|
|
const descriptionsLineIdx = getDescriptionsLineIdx(subsectionLines) ?? 2;
|
|
|
|
const subsectionInfoTextItems = subsectionLines
|
|
.slice(0, descriptionsLineIdx)
|
|
.flat();
|
|
const [date, dateScores] = getTextWithHighestFeatureScore(
|
|
subsectionInfoTextItems,
|
|
DATE_FEATURE_SETS
|
|
);
|
|
const [jobTitle, jobTitleScores] = getTextWithHighestFeatureScore(
|
|
subsectionInfoTextItems,
|
|
JOB_TITLE_FEATURE_SET
|
|
);
|
|
const COMPANY_FEATURE_SET: FeatureSet[] = [
|
|
[isBold, 2],
|
|
[getHasText(date), -4],
|
|
[getHasText(jobTitle), -4],
|
|
];
|
|
const [company, companyScores] = getTextWithHighestFeatureScore(
|
|
subsectionInfoTextItems,
|
|
COMPANY_FEATURE_SET,
|
|
false
|
|
);
|
|
|
|
const subsectionDescriptionsLines =
|
|
subsectionLines.slice(descriptionsLineIdx);
|
|
const descriptions = getBulletPointsFromLines(subsectionDescriptionsLines);
|
|
|
|
workExperiences.push({ company, jobTitle, date, descriptions });
|
|
workExperiencesScores.push({
|
|
companyScores,
|
|
jobTitleScores,
|
|
dateScores,
|
|
});
|
|
}
|
|
return { workExperiences, workExperiencesScores };
|
|
};
|