import type { ResumeWorkExperience } from "lib/redux/types"; import type { TextItem, FeatureSet, ResumeSectionToLines, } from "lib/parse-resume-from-pdf/types"; import { getSectionLinesByKeywords } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/get-section-lines"; import { DATE_FEATURE_SETS, hasNumber, getHasText, isBold, } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/common-features"; import { divideSectionIntoSubsections } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/subsections"; import { getTextWithHighestFeatureScore } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/feature-scoring-system"; import { getBulletPointsFromLines, getDescriptionsLineIdx, } from "lib/parse-resume-from-pdf/extract-resume-from-sections/lib/bullet-points"; // prettier-ignore const WORK_EXPERIENCE_KEYWORDS_LOWERCASE = ['work', 'experience', 'employment', 'history', 'job']; // prettier-ignore const JOB_TITLES = ['Accountant', 'Administrator', 'Advisor', 'Agent', 'Analyst', 'Apprentice', 'Architect', 'Assistant', 'Associate', 'Auditor', 'Bartender', 'Biologist', 'Bookkeeper', 'Buyer', 'Carpenter', 'Cashier', 'CEO', 'Clerk', 'Co-op', 'Co-Founder', 'Consultant', 'Coordinator', 'CTO', 'Developer', 'Designer', 'Director', 'Driver', 'Editor', 'Electrician', 'Engineer', 'Extern', 'Founder', 'Freelancer', 'Head', 'Intern', 'Janitor', 'Journalist', 'Laborer', 'Lawyer', 'Lead', 'Manager', 'Mechanic', 'Member', 'Nurse', 'Officer', 'Operator', 'Operation', 'Photographer', 'President', 'Producer', 'Recruiter', 'Representative', 'Researcher', 'Sales', 'Server', 'Scientist', 'Specialist', 'Supervisor', 'Teacher', 'Technician', 'Trader', 'Trainee', 'Treasurer', 'Tutor', 'Vice', 'VP', 'Volunteer', 'Webmaster', 'Worker']; const hasJobTitle = (item: TextItem) => JOB_TITLES.some((jobTitle) => item.text.split(/\s/).some((word) => word === jobTitle) ); const hasMoreThan5Words = (item: TextItem) => item.text.split(/\s/).length > 5; const JOB_TITLE_FEATURE_SET: FeatureSet[] = [ [hasJobTitle, 4], [hasNumber, -4], [hasMoreThan5Words, -2], ]; export const extractWorkExperience = (sections: ResumeSectionToLines) => { const workExperiences: ResumeWorkExperience[] = []; const workExperiencesScores = []; const lines = getSectionLinesByKeywords( sections, WORK_EXPERIENCE_KEYWORDS_LOWERCASE ); const subsections = divideSectionIntoSubsections(lines); for (const subsectionLines of subsections) { const descriptionsLineIdx = getDescriptionsLineIdx(subsectionLines) ?? 2; const subsectionInfoTextItems = subsectionLines .slice(0, descriptionsLineIdx) .flat(); const [date, dateScores] = getTextWithHighestFeatureScore( subsectionInfoTextItems, DATE_FEATURE_SETS ); const [jobTitle, jobTitleScores] = getTextWithHighestFeatureScore( subsectionInfoTextItems, JOB_TITLE_FEATURE_SET ); const COMPANY_FEATURE_SET: FeatureSet[] = [ [isBold, 2], [getHasText(date), -4], [getHasText(jobTitle), -4], ]; const [company, companyScores] = getTextWithHighestFeatureScore( subsectionInfoTextItems, COMPANY_FEATURE_SET, false ); const subsectionDescriptionsLines = subsectionLines.slice(descriptionsLineIdx); const descriptions = getBulletPointsFromLines(subsectionDescriptionsLines); workExperiences.push({ company, jobTitle, date, descriptions }); workExperiencesScores.push({ companyScores, jobTitleScores, dateScores, }); } return { workExperiences, workExperiencesScores }; };