Update topic assignment

pull/5821/head
Kamran Ahmed 6 months ago
parent 458ae33eec
commit dfef66f4b5
  1. 298
      scripts/assign-label-types.cjs

@ -1,184 +1,188 @@
const fs = require('node:fs');
const path = require('node:path');
const roadmapId = 'frontend';
const roadmapDir = path.join(
__dirname,
`../src/data/roadmaps/${roadmapId}/content`,
const allRoadmapDirs = fs.readdirSync(
path.join(__dirname, '../src/data/roadmaps'),
);
function getHostNameWithoutTld(hostname) {
const parts = hostname.split('.');
return parts.slice(0, parts.length - 1).join('.');
}
allRoadmapDirs.forEach((roadmapId) => {
const roadmapDir = path.join(
__dirname,
`../src/data/roadmaps/${roadmapId}/content`,
);
function isOfficialWebsite(hostname, fileName, roadmapId) {
fileName = fileName.replace('/index.md', '').replace('.md', '');
function getHostNameWithoutTld(hostname) {
const parts = hostname.split('.');
return parts.slice(0, parts.length - 1).join('.');
}
const parts = fileName.split('/');
const lastPart = parts[parts.length - 1];
function isOfficialWebsite(hostname, fileName, roadmapId) {
fileName = fileName.replace('/index.md', '').replace('.md', '');
const normalizedFilename = lastPart.replace(/\d+/g, '').replace(/-/g, '');
const normalizedHostname = getHostNameWithoutTld(hostname);
const parts = fileName.split('/');
const lastPart = parts[parts.length - 1];
if (normalizedFilename === normalizedHostname) {
return true;
}
if (normalizedFilename.includes(normalizedHostname)) {
return true;
}
const normalizedFilename = lastPart.replace(/\d+/g, '').replace(/-/g, '');
const normalizedHostname = getHostNameWithoutTld(hostname);
return !!roadmapId.includes(normalizedHostname);
}
// websites are educational websites that are of following types:
// - @official@
// - @article@
// - @course@
// - @opensource@
// - @podcast@
// - @video@
// - @website@
// content is only educational websites
function getTypeFromHostname(hostname, fileName, roadmapId) {
hostname = hostname.replace('www.', '');
const videoHostnames = ['youtube.com', 'vimeo.com', 'youtu.be'];
const courseHostnames = ['coursera.org', 'udemy.com', 'edx.org'];
const podcastHostnames = ['spotify.com', 'apple.com'];
const opensourceHostnames = ['github.com', 'gitlab.com'];
const articleHostnames = [
'neilpatel.com',
'learningseo.io',
'htmlreference.io',
'docs.gitlab.com',
'docs.github.com',
'skills.github.com',
'cloudflare.com',
'w3schools.com',
'medium.com',
'dev.to',
'web.dev',
'css-tricks.com',
'developer.mozilla.org',
'smashingmagazine.com',
'freecodecamp.org',
'cs.fyi',
'thenewstack.io',
'html5rocks.com',
'html.com',
'javascript.info',
'css-tricks.com',
'developer.apple.com',
];
if (articleHostnames.includes(hostname)) {
return 'article';
}
if (normalizedFilename === normalizedHostname) {
return true;
}
if (videoHostnames.includes(hostname)) {
return 'video';
}
if (normalizedFilename.includes(normalizedHostname)) {
return true;
}
if (courseHostnames.includes(hostname)) {
return 'course';
return !!roadmapId.includes(normalizedHostname);
}
if (podcastHostnames.includes(hostname)) {
return 'podcast';
}
// websites are educational websites that are of following types:
// - @official@
// - @article@
// - @course@
// - @opensource@
// - @podcast@
// - @video@
// - @website@
// content is only educational websites
function getTypeFromHostname(hostname, fileName, roadmapId) {
hostname = hostname.replace('www.', '');
const videoHostnames = ['youtube.com', 'vimeo.com', 'youtu.be'];
const courseHostnames = ['coursera.org', 'udemy.com', 'edx.org'];
const podcastHostnames = ['spotify.com', 'apple.com'];
const opensourceHostnames = ['github.com', 'gitlab.com'];
const articleHostnames = [
'neilpatel.com',
'learningseo.io',
'htmlreference.io',
'docs.gitlab.com',
'docs.github.com',
'skills.github.com',
'cloudflare.com',
'w3schools.com',
'medium.com',
'dev.to',
'web.dev',
'css-tricks.com',
'developer.mozilla.org',
'smashingmagazine.com',
'freecodecamp.org',
'cs.fyi',
'thenewstack.io',
'html5rocks.com',
'html.com',
'javascript.info',
'css-tricks.com',
'developer.apple.com',
];
if (articleHostnames.includes(hostname)) {
return 'article';
}
if (opensourceHostnames.includes(hostname)) {
return 'opensource';
}
if (videoHostnames.includes(hostname)) {
return 'video';
}
if (hostname === 'roadmap.sh') {
return 'roadmap.sh';
}
if (courseHostnames.includes(hostname)) {
return 'course';
}
if (isOfficialWebsite(hostname, fileName, roadmapId)) {
return 'official';
}
if (podcastHostnames.includes(hostname)) {
return 'podcast';
}
return 'article';
}
if (opensourceHostnames.includes(hostname)) {
return 'opensource';
}
function readNestedMarkdownFiles(dir, files = []) {
const dirEnts = fs.readdirSync(dir, { withFileTypes: true });
if (hostname === 'roadmap.sh') {
return 'roadmap.sh';
}
for (const dirent of dirEnts) {
const fullPath = path.join(dir, dirent.name);
if (dirent.isDirectory()) {
readNestedMarkdownFiles(fullPath, files);
} else {
if (path.extname(fullPath) === '.md') {
files.push(fullPath);
}
if (isOfficialWebsite(hostname, fileName, roadmapId)) {
return 'official';
}
return 'article';
}
return files;
}
const files = readNestedMarkdownFiles(roadmapDir);
// for each of the files, assign the type of link to the beginning of each markdown link
// i.e. - [@article@abc](xyz) where @article@ is the type of link. Possible types:
// - @article@
// - @course@
// - @opensource@
// - @podcast@
// - @video@
// - @website@
files.forEach((file) => {
const content = fs.readFileSync(file, 'utf-8');
const lines = content.split('\n');
const newContent = lines
.map((line) => {
if (line.startsWith('- [')) {
const type = line.match(/@(\w+)@/);
if (type) {
return line;
function readNestedMarkdownFiles(dir, files = []) {
const dirEnts = fs.readdirSync(dir, { withFileTypes: true });
for (const dirent of dirEnts) {
const fullPath = path.join(dir, dirent.name);
if (dirent.isDirectory()) {
readNestedMarkdownFiles(fullPath, files);
} else {
if (path.extname(fullPath) === '.md') {
files.push(fullPath);
}
}
}
let urlMatches = line.match(/\((https?:\/\/[^)]+)\)/);
let fullUrl = urlMatches?.[1];
return files;
}
if (!fullUrl) {
// is it slashed URL i.e. - [abc](/xyz)
fullUrl = line.match(/\((\/[^)]+)\)/)?.[1];
if (fullUrl) {
fullUrl = `https://roadmap.sh${fullUrl}`;
const files = readNestedMarkdownFiles(roadmapDir);
// for each of the files, assign the type of link to the beginning of each markdown link
// i.e. - [@article@abc](xyz) where @article@ is the type of link. Possible types:
// - @article@
// - @course@
// - @opensource@
// - @podcast@
// - @video@
// - @website@
files.forEach((file) => {
const content = fs.readFileSync(file, 'utf-8');
const lines = content.split('\n');
const newContent = lines
.map((line) => {
if (line.startsWith('- [')) {
const type = line.match(/@(\w+)@/);
if (type) {
return line;
}
let urlMatches = line.match(/\((https?:\/\/[^)]+)\)/);
let fullUrl = urlMatches?.[1];
if (!fullUrl) {
console.error('No URL found in line:', line);
return;
// is it slashed URL i.e. - [abc](/xyz)
fullUrl = line.match(/\((\/[^)]+)\)/)?.[1];
if (fullUrl) {
fullUrl = `https://roadmap.sh${fullUrl}`;
}
if (!fullUrl) {
console.error('No URL found in line:', line);
return;
}
}
}
const url = new URL(fullUrl);
const hostname = url.hostname;
const url = new URL(fullUrl);
const hostname = url.hostname;
let urlType = getTypeFromHostname(hostname, file, roadmapId);
const linkText = line.match(/\[([^\]]+)\]/)[1];
let urlType = getTypeFromHostname(hostname, file, roadmapId);
const linkText = line.match(/\[([^\]]+)\]/)[1];
if (
linkText.toLowerCase().startsWith('visit dedicated') &&
linkText.toLowerCase().endsWith('roadmap')
) {
urlType = 'roadmap';
}
if (
linkText.toLowerCase().startsWith('visit dedicated') &&
linkText.toLowerCase().endsWith('roadmap')
) {
urlType = 'roadmap';
}
return line.replace('- [', `- [@${urlType}@`).replace('](', '](');
}
return line.replace('- [', `- [@${urlType}@`).replace('](', '](');
}
return line;
})
.join('\n');
return line;
})
.join('\n');
fs.writeFileSync(file, newContent);
fs.writeFileSync(file, newContent);
});
});

Loading…
Cancel
Save