Update topic assignment

pull/5821/head
Kamran Ahmed 10 months ago
parent 458ae33eec
commit dfef66f4b5
  1. 298
      scripts/assign-label-types.cjs

@ -1,184 +1,188 @@
const fs = require('node:fs'); const fs = require('node:fs');
const path = require('node:path'); const path = require('node:path');
const roadmapId = 'frontend'; const allRoadmapDirs = fs.readdirSync(
path.join(__dirname, '../src/data/roadmaps'),
const roadmapDir = path.join(
__dirname,
`../src/data/roadmaps/${roadmapId}/content`,
); );
function getHostNameWithoutTld(hostname) { allRoadmapDirs.forEach((roadmapId) => {
const parts = hostname.split('.'); const roadmapDir = path.join(
return parts.slice(0, parts.length - 1).join('.'); __dirname,
} `../src/data/roadmaps/${roadmapId}/content`,
);
function isOfficialWebsite(hostname, fileName, roadmapId) { function getHostNameWithoutTld(hostname) {
fileName = fileName.replace('/index.md', '').replace('.md', ''); const parts = hostname.split('.');
return parts.slice(0, parts.length - 1).join('.');
}
const parts = fileName.split('/'); function isOfficialWebsite(hostname, fileName, roadmapId) {
const lastPart = parts[parts.length - 1]; fileName = fileName.replace('/index.md', '').replace('.md', '');
const normalizedFilename = lastPart.replace(/\d+/g, '').replace(/-/g, ''); const parts = fileName.split('/');
const normalizedHostname = getHostNameWithoutTld(hostname); const lastPart = parts[parts.length - 1];
if (normalizedFilename === normalizedHostname) { const normalizedFilename = lastPart.replace(/\d+/g, '').replace(/-/g, '');
return true; const normalizedHostname = getHostNameWithoutTld(hostname);
}
if (normalizedFilename.includes(normalizedHostname)) {
return true;
}
return !!roadmapId.includes(normalizedHostname); if (normalizedFilename === normalizedHostname) {
} return true;
}
// websites are educational websites that are of following types:
// - @official@
// - @article@
// - @course@
// - @opensource@
// - @podcast@
// - @video@
// - @website@
// content is only educational websites
function getTypeFromHostname(hostname, fileName, roadmapId) {
hostname = hostname.replace('www.', '');
const videoHostnames = ['youtube.com', 'vimeo.com', 'youtu.be'];
const courseHostnames = ['coursera.org', 'udemy.com', 'edx.org'];
const podcastHostnames = ['spotify.com', 'apple.com'];
const opensourceHostnames = ['github.com', 'gitlab.com'];
const articleHostnames = [
'neilpatel.com',
'learningseo.io',
'htmlreference.io',
'docs.gitlab.com',
'docs.github.com',
'skills.github.com',
'cloudflare.com',
'w3schools.com',
'medium.com',
'dev.to',
'web.dev',
'css-tricks.com',
'developer.mozilla.org',
'smashingmagazine.com',
'freecodecamp.org',
'cs.fyi',
'thenewstack.io',
'html5rocks.com',
'html.com',
'javascript.info',
'css-tricks.com',
'developer.apple.com',
];
if (articleHostnames.includes(hostname)) {
return 'article';
}
if (videoHostnames.includes(hostname)) { if (normalizedFilename.includes(normalizedHostname)) {
return 'video'; return true;
} }
if (courseHostnames.includes(hostname)) { return !!roadmapId.includes(normalizedHostname);
return 'course';
} }
if (podcastHostnames.includes(hostname)) { // websites are educational websites that are of following types:
return 'podcast'; // - @official@
} // - @article@
// - @course@
// - @opensource@
// - @podcast@
// - @video@
// - @website@
// content is only educational websites
function getTypeFromHostname(hostname, fileName, roadmapId) {
hostname = hostname.replace('www.', '');
const videoHostnames = ['youtube.com', 'vimeo.com', 'youtu.be'];
const courseHostnames = ['coursera.org', 'udemy.com', 'edx.org'];
const podcastHostnames = ['spotify.com', 'apple.com'];
const opensourceHostnames = ['github.com', 'gitlab.com'];
const articleHostnames = [
'neilpatel.com',
'learningseo.io',
'htmlreference.io',
'docs.gitlab.com',
'docs.github.com',
'skills.github.com',
'cloudflare.com',
'w3schools.com',
'medium.com',
'dev.to',
'web.dev',
'css-tricks.com',
'developer.mozilla.org',
'smashingmagazine.com',
'freecodecamp.org',
'cs.fyi',
'thenewstack.io',
'html5rocks.com',
'html.com',
'javascript.info',
'css-tricks.com',
'developer.apple.com',
];
if (articleHostnames.includes(hostname)) {
return 'article';
}
if (opensourceHostnames.includes(hostname)) { if (videoHostnames.includes(hostname)) {
return 'opensource'; return 'video';
} }
if (hostname === 'roadmap.sh') { if (courseHostnames.includes(hostname)) {
return 'roadmap.sh'; return 'course';
} }
if (isOfficialWebsite(hostname, fileName, roadmapId)) { if (podcastHostnames.includes(hostname)) {
return 'official'; return 'podcast';
} }
return 'article'; if (opensourceHostnames.includes(hostname)) {
} return 'opensource';
}
function readNestedMarkdownFiles(dir, files = []) { if (hostname === 'roadmap.sh') {
const dirEnts = fs.readdirSync(dir, { withFileTypes: true }); return 'roadmap.sh';
}
for (const dirent of dirEnts) { if (isOfficialWebsite(hostname, fileName, roadmapId)) {
const fullPath = path.join(dir, dirent.name); return 'official';
if (dirent.isDirectory()) {
readNestedMarkdownFiles(fullPath, files);
} else {
if (path.extname(fullPath) === '.md') {
files.push(fullPath);
}
} }
return 'article';
} }
return files; function readNestedMarkdownFiles(dir, files = []) {
} const dirEnts = fs.readdirSync(dir, { withFileTypes: true });
const files = readNestedMarkdownFiles(roadmapDir); for (const dirent of dirEnts) {
const fullPath = path.join(dir, dirent.name);
// for each of the files, assign the type of link to the beginning of each markdown link if (dirent.isDirectory()) {
// i.e. - [@article@abc](xyz) where @article@ is the type of link. Possible types: readNestedMarkdownFiles(fullPath, files);
// - @article@ } else {
// - @course@ if (path.extname(fullPath) === '.md') {
// - @opensource@ files.push(fullPath);
// - @podcast@
// - @video@
// - @website@
files.forEach((file) => {
const content = fs.readFileSync(file, 'utf-8');
const lines = content.split('\n');
const newContent = lines
.map((line) => {
if (line.startsWith('- [')) {
const type = line.match(/@(\w+)@/);
if (type) {
return line;
} }
}
}
let urlMatches = line.match(/\((https?:\/\/[^)]+)\)/); return files;
let fullUrl = urlMatches?.[1]; }
if (!fullUrl) { const files = readNestedMarkdownFiles(roadmapDir);
// is it slashed URL i.e. - [abc](/xyz)
fullUrl = line.match(/\((\/[^)]+)\)/)?.[1]; // for each of the files, assign the type of link to the beginning of each markdown link
if (fullUrl) { // i.e. - [@article@abc](xyz) where @article@ is the type of link. Possible types:
fullUrl = `https://roadmap.sh${fullUrl}`; // - @article@
// - @course@
// - @opensource@
// - @podcast@
// - @video@
// - @website@
files.forEach((file) => {
const content = fs.readFileSync(file, 'utf-8');
const lines = content.split('\n');
const newContent = lines
.map((line) => {
if (line.startsWith('- [')) {
const type = line.match(/@(\w+)@/);
if (type) {
return line;
} }
let urlMatches = line.match(/\((https?:\/\/[^)]+)\)/);
let fullUrl = urlMatches?.[1];
if (!fullUrl) { if (!fullUrl) {
console.error('No URL found in line:', line); // is it slashed URL i.e. - [abc](/xyz)
return; fullUrl = line.match(/\((\/[^)]+)\)/)?.[1];
if (fullUrl) {
fullUrl = `https://roadmap.sh${fullUrl}`;
}
if (!fullUrl) {
console.error('No URL found in line:', line);
return;
}
} }
}
const url = new URL(fullUrl); const url = new URL(fullUrl);
const hostname = url.hostname; const hostname = url.hostname;
let urlType = getTypeFromHostname(hostname, file, roadmapId); let urlType = getTypeFromHostname(hostname, file, roadmapId);
const linkText = line.match(/\[([^\]]+)\]/)[1]; const linkText = line.match(/\[([^\]]+)\]/)[1];
if ( if (
linkText.toLowerCase().startsWith('visit dedicated') && linkText.toLowerCase().startsWith('visit dedicated') &&
linkText.toLowerCase().endsWith('roadmap') linkText.toLowerCase().endsWith('roadmap')
) { ) {
urlType = 'roadmap'; urlType = 'roadmap';
} }
return line.replace('- [', `- [@${urlType}@`).replace('](', ']('); return line.replace('- [', `- [@${urlType}@`).replace('](', '](');
} }
return line; return line;
}) })
.join('\n'); .join('\n');
fs.writeFileSync(file, newContent); fs.writeFileSync(file, newContent);
});
}); });

Loading…
Cancel
Save