developer-roadmap/public/roadmap-content/mlops.json

{
  "_7uvOebQUI4xaSwtMjpEd": {
    "title": "Programming Fundamentals",
    "description": "Programming is the key requirement for MLOps. You need to be proficient in atleast one programming language. Python is the most popular language for MLOps.",
    "links": []
  },
  "Vh81GnOUOZvDOlOyI5PwT": {
    "title": "Python",
    "description": "Python is an interpreted high-level general-purpose programming language. Its design philosophy emphasizes code readability with its significant use of indentation. Its language constructs as well as its object-oriented approach aim to help programmers write clear, logical code for small and large-scale projects. Python is dynamically-typed and garbage-collected. It supports multiple programming paradigms, including structured (particularly, procedural), object-oriented and functional programming. Python is often described as a \"batteries included\" language due to its comprehensive standard library.\n\nTo start learning Python, here are some useful resources:\n\nRemember, practice is key, and the more you work with Python, the more you'll appreciate its utility in the world of cyber security.",
    "links": [
      {
        "title": "Python.org",
        "url": "https://www.python.org/",
        "type": "article"
      },
      {
        "title": "Real Python",
        "url": "https://realpython.com/",
        "type": "article"
      },
      {
        "title": "Automate the Boring Stuff with Python",
        "url": "https://automatetheboringstuff.com/",
        "type": "article"
      },
      {
        "title": "Explore top posts about Python",
        "url": "https://app.daily.dev/tags/python?ref=roadmapsh",
        "type": "article"
      }
    ]
  },
  "vdVq3RQvQF3mF8PQc6DMg": {
    "title": "Go",
    "description": "Go is an open source programming language supported by Google. Go can be used to write cloud services, CLI tools, used for API development, and much more.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "Visit Dedicated Go Roadmap",
        "url": "/golang",
        "type": "article"
      },
      {
        "title": "A Tour of Go – Go Basics",
        "url": "https://go.dev/tour/welcome/1",
        "type": "article"
      },
      {
        "title": "Go Reference Documentation",
        "url": "https://go.dev/doc/",
        "type": "article"
      },
      {
        "title": "Go by Example - annotated example programs",
        "url": "https://gobyexample.com/",
        "type": "article"
      },
      {
        "title": "W3Schools Go Tutorial ",
        "url": "https://www.w3schools.com/go/",
        "type": "article"
      },
      {
        "title": "Making a RESTful JSON API in Go",
        "url": "https://thenewstack.io/make-a-restful-json-api-go/",
        "type": "article"
      },
      {
        "title": "Go, the Programming Language of the Cloud",
        "url": "https://thenewstack.io/go-the-programming-language-of-the-cloud/",
        "type": "article"
      },
      {
        "title": "Explore top posts about Golang",
        "url": "https://app.daily.dev/tags/golang?ref=roadmapsh",
        "type": "article"
      },
      {
        "title": "Go Class by Matt",
        "url": "https://www.youtube.com/playlist?list=PLoILbKo9rG3skRCj37Kn5Zj803hhiuRK6",
        "type": "video"
      }
    ]
  },
  "mMzqJF2KQ49TDEk5F3VAI": {
    "title": "Bash",
    "description": "Understanding bash is essential for MLOps tasks.\n\n*   **Book Suggestion:** _The Linux Command Line, 2nd Edition_ by William E. Shotts",
    "links": []
  },
  "oUhlUoWQQ1txx_sepD5ev": {
    "title": "Version Control Systems",
    "description": "Version control/source control systems allow developers to track and control changes to code over time. These services often include the ability to make atomic revisions to code, branch/fork off of specific points, and to compare versions of code. They are useful in determining the who, what, when, and why code changes were made.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "Git",
        "url": "https://git-scm.com/",
        "type": "article"
      },
      {
        "title": "What is Version Control?",
        "url": "https://www.atlassian.com/git/tutorials/what-is-version-control",
        "type": "article"
      }
    ]
  },
  "06T5CbZAGJU6fJhCmqCC8": {
    "title": "Git",
    "description": "[Git](https://git-scm.com/) is a free and open source distributed version control system designed to handle everything from small to very large projects with speed and efficiency.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "Learn Git with Tutorials, News and Tips - Atlassian",
        "url": "https://www.atlassian.com/git",
        "type": "article"
      },
      {
        "title": "Git Cheat Sheet",
        "url": "https://cs.fyi/guide/git-cheatsheet",
        "type": "article"
      },
      {
        "title": "Explore top posts about Git",
        "url": "https://app.daily.dev/tags/git?ref=roadmapsh",
        "type": "article"
      },
      {
        "title": "Git & GitHub Crash Course For Beginners",
        "url": "https://www.youtube.com/watch?v=SWYqp7iY_Tc",
        "type": "video"
      }
    ]
  },
  "7t7jSb3YgyWlhgCe8Se1I": {
    "title": "GitHub",
    "description": "GitHub is a provider of Internet hosting for software development and version control using Git. It offers the distributed version control and source code management functionality of Git, plus its own features.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "GitHub Website",
        "url": "https://github.com",
        "type": "opensource"
      },
      {
        "title": "GitHub Documentation",
        "url": "https://docs.github.com/en/get-started/quickstart",
        "type": "article"
      },
      {
        "title": "How to Use Git in a Professional Dev Team",
        "url": "https://ooloo.io/project/github-flow",
        "type": "article"
      },
      {
        "title": "Learn Git Branching",
        "url": "https://learngitbranching.js.org/?locale=en_us",
        "type": "article"
      },
      {
        "title": "Explore top posts about GitHub",
        "url": "https://app.daily.dev/tags/github?ref=roadmapsh",
        "type": "article"
      },
      {
        "title": "What is GitHub?",
        "url": "https://www.youtube.com/watch?v=w3jLJU7DT5E",
        "type": "video"
      },
      {
        "title": "Git vs. GitHub: Whats the difference?",
        "url": "https://www.youtube.com/watch?v=wpISo9TNjfU",
        "type": "video"
      },
      {
        "title": "Git and GitHub for Beginners",
        "url": "https://www.youtube.com/watch?v=RGOj5yH7evk",
        "type": "video"
      },
      {
        "title": "Git and GitHub - CS50 Beyond 2019",
        "url": "https://www.youtube.com/watch?v=eulnSXkhE7I",
        "type": "video"
      }
    ]
  },
  "00GZcwe25QYi7rDzaOoMt": {
    "title": "Cloud Computing",
    "description": "**Cloud Computing** refers to the delivery of computing services over the internet rather than using local servers or personal devices. These services include servers, storage, databases, networking, software, analytics, and intelligence. Cloud Computing enables faster innovation, flexible resources, and economies of scale. There are various types of cloud computing such as public clouds, private clouds, and hybrids clouds. Furthermore, it's divided into different services like Infrastructure as a Service (IaaS), Platform as a Service (PaaS), and Software as a Service (SaaS). These services differ mainly in the level of control an organization has over their data and infrastructures.",
    "links": []
  },
  "u3E7FGW4Iwdsu61KYFxCX": {
    "title": "AWS / Azure / GCP",
    "description": "AWS (Amazon Web Services) Azure and GCP (Google Cloud Platform) are three leading providers of cloud computing services. AWS by Amazon is the oldest and the most established among the three, providing a breadth and depth of solutions ranging from infrastructure services like compute, storage, and databases to the machine and deep learning. Azure, by Microsoft, has integrated tools for DevOps, supports a large number of programming languages, and offers seamless integration with on-prem servers and Microsoft’s software. Google's GCP has strength in cost-effectiveness, live migration of virtual machines, and flexible computing options. All three have introduced various MLOps tools and services to boost capabilities for machine learning development and operations.\n\nVisit the following resources to learn more about AWS, Azure, and GCP:",
    "links": [
      {
        "title": "AWS Roadmap",
        "url": "https://roadmap.sh/aws",
        "type": "article"
      },
      {
        "title": "Azure Tutorials",
        "url": "https://docs.microsoft.com/en-us/learn/azure/",
        "type": "article"
      },
      {
        "title": "GCP Learning Resources",
        "url": "https://cloud.google.com/training",
        "type": "article"
      },
      {
        "title": "Explore top posts about AWS",
        "url": "https://app.daily.dev/tags/aws?ref=roadmapsh",
        "type": "article"
      }
    ]
  },
  "kbfucfIO5KCsuv3jKbHTa": {
    "title": "Cloud-native ML Services",
    "description": "Most of the cloud providers offer managed services for machine learning. These services are designed to help data scientists and machine learning engineers to build, train, and deploy machine learning models at scale. These services are designed to be cloud-native, meaning they are designed to work with other cloud services and are optimized for the cloud environment.\n\nHere are the services offered by the major cloud providers:\n\n*   **Amazon Web Services (AWS)**: SageMaker\n*   **Google Cloud Platform (GCP)**: AI Platform\n*   **Microsoft Azure**: Azure Machine Learning",
    "links": []
  },
  "tKeejLv8Q7QX40UtOjpav": {
    "title": "Containerization",
    "description": "Containers are a construct in which [cgroups](https://en.wikipedia.org/wiki/Cgroups), [namespaces](https://en.wikipedia.org/wiki/Linux_namespaces), and [chroot](https://en.wikipedia.org/wiki/Chroot) are used to fully encapsulate and isolate a process. This encapsulated process, called a container image, shares the kernel of the host with other containers, allowing containers to be significantly smaller and faster than virtual machines.\n\nThese images are designed for portability, allowing for full local testing of a static image, and easy deployment to a container management platform.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "What are Containers?",
        "url": "https://cloud.google.com/learn/what-are-containers",
        "type": "article"
      },
      {
        "title": "What is a Container?",
        "url": "https://www.docker.com/resources/what-container/",
        "type": "article"
      },
      {
        "title": "Articles about Containers - The New Stack",
        "url": "https://thenewstack.io/category/containers/",
        "type": "article"
      },
      {
        "title": "Explore top posts about Containers",
        "url": "https://app.daily.dev/tags/containers?ref=roadmapsh",
        "type": "article"
      },
      {
        "title": "What are Containers?",
        "url": "https://www.youtube.com/playlist?list=PLawsLZMfND4nz-WDBZIj8-nbzGFD4S9oz",
        "type": "video"
      }
    ]
  },
  "XIdCvT-4HyyglHJLRrHlz": {
    "title": "Docker",
    "description": "Docker is a platform for working with containerized applications. Among its features are a daemon and client for managing and interacting with containers, registries for storing images, and a desktop application to package all these features together.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "Docker Documentation",
        "url": "https://docs.docker.com/",
        "type": "article"
      },
      {
        "title": "Explore top posts about Docker",
        "url": "https://app.daily.dev/tags/docker?ref=roadmapsh",
        "type": "article"
      },
      {
        "title": "Docker Tutorial",
        "url": "https://www.youtube.com/watch?v=RqTEHSBrYFw",
        "type": "video"
      },
      {
        "title": "Docker simplified in 55 seconds",
        "url": "https://youtu.be/vP_4DlOH1G4",
        "type": "video"
      }
    ]
  },
  "XQoK9l-xtN2J8ZV8dw53X": {
    "title": "Kubernetes",
    "description": "Kubernetes is an [open source](https://github.com/kubernetes/kubernetes) container management platform, and the dominant product in this space. Using Kubernetes, teams can deploy images across multiple underlying hosts, defining their desired availability, deployment logic, and scaling logic in YAML. Kubernetes evolved from Borg, an internal Google platform used to provision and allocate compute resources (similar to the Autopilot and Aquaman systems of Microsoft Azure).\n\nThe popularity of Kubernetes has made it an increasingly important skill for the DevOps Engineer and has triggered the creation of Platform teams across the industry. These Platform engineering teams often exist with the sole purpose of making Kubernetes approachable and usable for their product development colleagues.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "Kubernetes Website",
        "url": "https://kubernetes.io/",
        "type": "article"
      },
      {
        "title": "Kubernetes Documentation",
        "url": "https://kubernetes.io/docs/home/",
        "type": "article"
      },
      {
        "title": "Primer: How Kubernetes Came to Be, What It Is, and Why You Should Care",
        "url": "https://thenewstack.io/primer-how-kubernetes-came-to-be-what-it-is-and-why-you-should-care/",
        "type": "article"
      },
      {
        "title": "Kubernetes: An Overview",
        "url": "https://thenewstack.io/kubernetes-an-overview/",
        "type": "article"
      },
      {
        "title": "Explore top posts about Kubernetes",
        "url": "https://app.daily.dev/tags/kubernetes?ref=roadmapsh",
        "type": "article"
      },
      {
        "title": "Kubernetes Crash Course for Absolute Beginners",
        "url": "https://www.youtube.com/watch?v=s_o8dwzRlu4",
        "type": "video"
      }
    ]
  },
  "ulka7VEVjz6ls5SnI6a6z": {
    "title": "Machine Learning Fundamentals",
    "description": "An MLOps engineer should have a basic understanding of machine learning models.\n\n*   **Courses:** [MLCourse.ai](https://mlcourse.ai/), [Fast.ai](https://course.fast.ai)\n*   **Book Suggestion:** _Applied Machine Learning and AI for Engineers_ by Jeff Prosise",
    "links": []
  },
  "VykbCu7LWIx8fQpqKzoA7": {
    "title": "Data Engineering Fundamentals",
    "description": "Data Engineering is essentially dealing with the collection, validation, storage, transformation, and processing of data. The objective is to provide reliable, efficient, and scalable data pipelines and infrastructure that allow data scientists to convert data into actionable insights. It involves steps like data ingestion, data storage, data processing, and data provisioning. Important concepts include designing, building, and maintaining data architecture, databases, processing systems, and large-scale processing systems. It is crucial to have extensive technical knowledge in various tools and programming languages like SQL, Python, Hadoop, and more.",
    "links": []
  },
  "cOg3ejZRYE-u-M0c89IjM": {
    "title": "Data Pipelines",
    "description": "Data pipelines refer to a set of processes that involve moving data from one system to another, for purposes such as data integration, data migration, data transformation, or data synchronization. These processes can involve a variety of data sources and destinations, and may often require data to be cleaned, enriched, or otherwise transformed along the way. It's a key concept in data engineering to ensure that data is appropriately processed from its source to the location where it will be used, typically a data warehouse, data mart, or a data lake. As such, data pipelines play a crucial part in building an effective and efficient data analytics setup, enabling the flow of data to be processed for insights.\n\nIt is important to understand the difference between ELT and ETL pipelines. ELT stands for Extract, Load, Transform, and refers to a process where data is first extracted from source systems, then loaded into a target system, and finally transformed within the target system. ETL, on the other hand, stands for Extract, Transform, Load, and refers to a process where data is first extracted from source systems, then transformed, and finally loaded into a target system. The choice between ELT and ETL pipelines depends on the specific requirements of the data processing tasks at hand, and the capabilities of the systems involved.",
    "links": []
  },
  "wOogVDV4FIDLXVPwFqJ8C": {
    "title": "Data Lakes & Warehouses",
    "description": "\"**Data Lakes** are large-scale data repository systems that store raw, untransformed data, in various formats, from multiple sources. They're often used for big data and real-time analytics requirements. Data lakes preserve the original data format and schema which can be modified as necessary. On the other hand, **Data Warehouses** are data storage systems which are designed for analyzing, reporting and integrating with transactional systems. The data in a warehouse is clean, consistent, and often transformed to meet wide-range of business requirements. Hence, data warehouses provide structured data but require more processing and management compared to data lakes.\"",
    "links": []
  },
  "Berd78HvnulNEGOsHCf8n": {
    "title": "Data Ingestion Architecture",
    "description": "Data ingestion is the process of collecting, transferring, and loading data from various sources to a destination where it can be stored and analyzed. There are several data ingestion architectures that can be used to collect data from different sources and load it into a data warehouse, data lake, or other storage systems. These architectures can be broadly classified into two categories: batch processing and real-time processing. How you choose to ingest data will depend on the volume, velocity, and variety of data you are working with, as well as the latency requirements of your use case.\n\nLambda and Kappa architectures are two popular data ingestion architectures that combine batch and real-time processing to handle large volumes of data efficiently.",
    "links": []
  },
  "pVSlVHXIap0unFxLGM-lQ": {
    "title": "Airflow",
    "description": "Airflow is a platform to programmatically author, schedule and monitor workflows. Use airflow to author workflows as directed acyclic graphs (DAGs) of tasks. The airflow scheduler executes your tasks on an array of workers while following the specified dependencies. Rich command line utilities make performing complex surgeries on DAGs a snap. The rich user interface makes it easy to visualize pipelines running in production, monitor progress, and troubleshoot issues when needed. When workflows are defined as code, they become more maintainable, versionable, testable, and collaborative.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "Airflow website",
        "url": "https://airflow.apache.org/",
        "type": "article"
      },
      {
        "title": "Explore top posts about Apache Airflow",
        "url": "https://app.daily.dev/tags/apache-airflow?ref=roadmapsh",
        "type": "article"
      }
    ]
  },
  "UljuqA89_SlCSDWWMD_C_": {
    "title": "Spark",
    "description": "Apache Spark is an open-source distributed computing system designed for big data processing and analytics. It offers a unified interface for programming entire clusters, enabling efficient handling of large-scale data with built-in support for data parallelism and fault tolerance. Spark excels in processing tasks like batch processing, real-time data streaming, machine learning, and graph processing. It’s known for its speed, ease of use, and ability to process data in-memory, significantly outperforming traditional MapReduce systems. Spark is widely used in big data ecosystems for its scalability and versatility across various data processing tasks.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "ApacheSpark",
        "url": "https://spark.apache.org/documentation.html",
        "type": "article"
      },
      {
        "title": "Spark By Examples",
        "url": "https://sparkbyexamples.com",
        "type": "article"
      },
      {
        "title": "Explore top posts about Apache Spark",
        "url": "https://app.daily.dev/tags/spark?ref=roadmapsh",
        "type": "article"
      }
    ]
  },
  "fMNwzhgLgHlAZJ9NvKikR": {
    "title": "Kafka",
    "description": "Apache Kafka is an open-source distributed event streaming platform used by thousands of companies for high-performance data pipelines, streaming analytics, data integration, and mission-critical applications.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "Apache Kafka quickstart",
        "url": "https://kafka.apache.org/quickstart",
        "type": "article"
      },
      {
        "title": "Explore top posts about Kafka",
        "url": "https://app.daily.dev/tags/kafka?ref=roadmapsh",
        "type": "article"
      },
      {
        "title": "Apache Kafka Fundamentals",
        "url": "https://www.youtube.com/watch?v=B5j3uNBH8X4",
        "type": "video"
      }
    ]
  },
  "o6GQ3-8DgDtHzdX6yeg1w": {
    "title": "Flink",
    "description": "Apache Flink is a distributed stream processing framework that is used to process large amounts of data in real-time. It is designed to be highly scalable and fault-tolerant. Flink is built on top of the Apache Kafka messaging system and is used to process data streams in real-time.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "Apache Flink Documentation",
        "url": "https://flink.apache.org/",
        "type": "article"
      },
      {
        "title": "Explore top posts about Apache Flink",
        "url": "https://app.daily.dev/tags/apache-flink?ref=roadmapsh",
        "type": "article"
      }
    ]
  },
  "iTsEHVCo6KGq7H2HMgy5S": {
    "title": "MLOps Principles",
    "description": "Awareness of MLOps principles and maturity factors is required.\n\n*   **Books:**\n    *   _Designing Machine Learning Systems_ by Chip Huyen\n    *   _Introducing MLOps_ by Mark Treveil and Dataiku\n*   **Assessment:** [MLOps maturity assessment](https://marvelousmlops.substack.com/p/mlops-maturity-assessment)\n*   **Great resource on MLOps:** [ml-ops.org](https://ml-ops.org)",
    "links": []
  },
  "l1xasxQy2vAY34NWaqKEe": {
    "title": "MLOps Components",
    "description": "MLOps components can be broadly classified into three major categories: Development, Operations and Governance. The **Development** components include everything involved in the creation of machine learning models, such as data extraction, data analysis, feature engineering, and machine learning model training. The **Operations** category includes components involved in deploying, monitoring, and maintaining machine learning models in production. This may include release management, model serving, and performance monitoring. Lastly, the **Governance** category encompasses the policies and regulations related to machine learning models. This includes model audit and tracking, model explainability, and security & compliance regulations.",
    "links": []
  },
  "kHDSwlSq8WkLey4EJIQSR": {
    "title": "Version Control",
    "description": "Version control/source control systems allow developers to track and control changes to code over time. These services often include the ability to make atomic revisions to code, branch/fork off of specific points, and to compare versions of code. They are useful in determining the who, what, when, and why code changes were made.\n\nVisit the following resources to learn more:",
    "links": [
      {
        "title": "Git",
        "url": "https://git-scm.com/",
        "type": "article"
      },
      {
        "title": "What is Version Control?",
        "url": "https://www.atlassian.com/git/tutorials/what-is-version-control",
        "type": "article"
      },
      {
        "title": "Explore top posts about Version Control",
        "url": "https://app.daily.dev/tags/version-control?ref=roadmapsh",
        "type": "article"
      }
    ]
  },
  "a6vawajw7BpL6plH_nuAz": {
    "title": "CI/CD",
    "description": "Critical for traceable and reproducible ML model deployments.\n\n*   **Books:**\n    *   _Learning GitHub Actions_ by Brent Laster\n    *   _Learning Git_ by Anna Skoulikari\n*   **Tutorials & Courses:** [Git & GitHub for beginners](https://www.youtube.com/watch?v=RGOj5yH7evk), [Python to Production guide](https://www.udemy.com/course/setting-up-the-linux-terminal-for-software-development/), [Version Control Missing Semester](https://missing.csail.mit.edu/2020/version-control/), [https://learngitbranching.js.org/](https://learngitbranching.js.org/)\n*   **Tool:** [Pre-commit hooks](https://marvelousmlops.substack.com/p/welcome-to-pre-commit-heaven)",
    "links": []
  },
  "fes7M--Y8i08_zeP98tVV": {
    "title": "Orchestration",
    "description": "Systems like Airflow and Mage are important in ML engineering.\n\n*   **Course:** [Introduction to Airflow in Python](https://app.datacamp.com/learn/courses/introduction-to-airflow-in-python)\n*   **Note:** Airflow is also featured in the _ML Engineering with Python_ book and [_The Full Stack 7-Steps MLOps Framework_](https://www.pauliusztin.me/courses/the-full-stack-7-steps-mlops-framework).",
    "links": []
  },
  "fGGWKmAJ50Ke6wWJBEgby": {
    "title": "Experiment Tracking & Model Registry",
    "description": "**Experiment Tracking** is an essential part of MLOps, providing a system to monitor and record the different experiments conducted during the machine learning model development process. This involves capturing, organizing and visualizing the metadata associated with each experiment, such as hyperparameters used, models produced, metrics like accuracy or loss, and other information about the computational environment. This tracking allows for reproducibility of experiments, comparison across different experiment runs, and helps in identifying the best models.\n\nLogging metadata, parameters, and artifacts of training runs.\n\n*   **Tool:** MLflow\n*   **Courses:** [MLflow Udemy course](https://www.udemy.com/course/mlflow-course/), [End-to-end machine learning (MLflow piece)](https://www.udemy.com/course/sustainable-and-scalable-machine-learning-project-development/)",
    "links": []
  },
  "6XgP_2NLuiw654zvTyueT": {
    "title": "Data Lineage & Feature Stores",
    "description": "**Data Lineage** refers to the life-cycle of data, including its origins, movements, characteristics and quality. It's a critical component in MLOps for tracking the journey of data through every process in a pipeline, from raw input to model output. Data lineage helps in maintaining transparency, ensuring compliance, and facilitating data debugging or tracing data related bugs. It provides a clear representation of data sources, transformations, and dependencies thereby aiding in audits, governance, or reproduction of machine learning models.\n\nFeature stores are a crucial component of MLOps infrastructure.\n\n*   **Tutorial:** Creating a feature store with Feast [Part 1](https://kedion.medium.com/creating-a-feature-store-with-feast-part-1-37c380223e2f) [Part 2](https://kedion.medium.com/feature-storage-for-ml-with-feast-part-2-34df1971a8d3) [Part 3](https://kedion.medium.com/feature-storage-for-ml-with-feast-a061899fc4a2)\n*   **Tool:** DVC for data tracking\n*   **Course:** [End-to-end machine learning (DVC piece)](https://www.udemy.com/course/sustainable-and-scalable-machine-learning-project-development/)",
    "links": []
  },
  "zsW1NRb0dMgS-KzWsI0QU": {
    "title": "Model Training & Serving",
    "description": "\"Model Training\" refers to the phase in the Machine Learning (ML) pipeline where we teach a machine learning model how to make predictions by providing it with data. This process begins with feeding the model a training dataset, which it uses to learn and understand patterns or perform computations. The model's performance is then evaluated by comparing its prediction outputs with the actual results. Various algorithms can be used in the model training process. The choice of algorithm usually depends on the task, the data available, and the requirements of the project. It is worth noting that the model training stage can be computationally expensive particularly when dealing with large datasets or complex models.\n\nDecisions depend on the organization's infrastructure.\n\n*   **Repository Suggestion:** [ML Deployment k8s Fast API](https://github.com/sayakpaul/ml-deployment-k8s-fastapi/tree/main)\n*   **Tutorial Suggestions:** [ML deployment with k8s FastAPI, Building an ML app with FastAPI](https://dev.to/bravinsimiyu/beginner-guide-on-how-to-build-a-machine-learning-app-with-fastapi-part-ii-deploying-the-fastapi-application-to-kubernetes-4j6g), [Basic Kubeflow pipeline](https://towardsdatascience.com/tutorial-basic-kubeflow-pipeline-from-scratch-5f0350dc1905), [Building and deploying ML pipelines](https://www.datacamp.com/tutorial/kubeflow-tutorial-building-and-deploying-machine-learning-pipelines?utm_source=google&utm_medium=paid_search&utm_campaignid=19589720818&utm_adgroupid=157156373991&utm_device=c&utm_keyword=&utm_matchtype=&utm_network=g&utm_adpostion=&utm_creative=683184494153&utm_targetid=dsa-2218886984380&utm_loc_interest_ms=&utm_loc_physical_ms=9064564&utm_content=&utm_campaign=230119_1-sea~dsa~tofu_2-b2c_3-eu_4-prc_5-na_6-na_7-le_8-pdsh-go_9-na_10-na_11-na-dec23&gad_source=1&gclid=Cj0KCQiA4Y-sBhC6ARIsAGXF1g7iSih9h2RGL27LwWY6dlPLhEss-e5Af8pnaBvdDynRh7IHIKi8sGgaApD-EALw_wcB), [KServe tutorial](https://towardsdatascience.com/kserve-highly-scalable-machine-learning-deployment-with-kubernetes-aa7af0b71202)",
    "links": []
  },
  "r4fbUwD83uYumEO1X8f09": {
    "title": "Monitoring & Observability",
    "description": "**Monitoring** in MLOps primarily involves tracking the performance of machine learning (ML) models in production to ensure that they continually deliver accurate and reliable results. Such monitoring is necessary because the real-world data that these models handle may change over time, a scenario known as data drift. These changes can adversely affect model performance. Monitoring helps to detect any anomalies in the model’s behaviour or performance and such alerts can trigger the retraining of models with new data. From a broader perspective, monitoring also involves tracking resources and workflows to detect and rectify any operational issues in the MLOps pipeline.",
    "links": [
      {
        "title": "ML Monitoring vs Observability article",
        "url": "https://marvelousmlops.substack.com/p/ml-monitoring-vs-ml-observability",
        "type": "article"
      },
      {
        "title": "Machine learning monitoring concepts",
        "url": "https://app.datacamp.com/learn/courses/machine-learning-monitoring-concepts",
        "type": "article"
      },
      {
        "title": "Monitoring ML in Python",
        "url": "https://app.datacamp.com/learn/courses/monitoring-machine-learning-in-python",
        "type": "article"
      },
      {
        "title": "Prometheus, Grafana",
        "url": "https://www.udemy.com/course/mastering-prometheus-and-grafana/",
        "type": "article"
      }
    ]
  },
  "sf67bSL7HAx6iN7S6MYKs": {
    "title": "Infrastructure as Code",
    "description": "Essential for a reproducible MLOps framework.\n\n*   **Course:** [Terraform course for beginners](https://www.youtube.com/watch?v=SLB_c_ayRMo)\n*   **Video:** [8 Terraform best practices by Techworld by Nana](https://www.youtube.com/watch?v=gxPykhPxRW0)\n*   **Book Suggestion:** _Terraform: Up and Running, 3rd Edition_ by Yevgeniy Brikman",
    "links": []
  }
}