Privacy Engineering


Introduction

Privacy engineering integrates data protection principles into system architecture from the earliest design stages. Rather than treating privacy as an afterthought or compliance checkbox, privacy engineering embeds controls into the fabric of software systems. This approach aligns with the "privacy by design" framework and regulatory requirements like GDPR and CCPA.

Privacy by Design

Privacy by Design (PbD) is a framework developed by the Information and Privacy Commissioner of Ontario, articulated through seven foundational principles.

The Seven Principles


* **Proactive not Reactive**: Prevent privacy risks from occurring, not remediate after the fact

2\. **Privacy as Default**: Personal data is automatically protected without user action 3\. **Privacy Embedded into Design**: Privacy is integral to the system, not bolted on 4\. **Full Functionality**: Privacy does not sacrifice functionality — positive-sum, not zero-sum 5\. **End-to-End Security**: Full lifecycle protection from collection to destruction 6\. **Visibility and Transparency**: Processes are open, accountable, and auditable 7\. **Respect for User Privacy**: User-centric design with strong defaults and clear notices




# Privacy by design: data minimization example


class UserRegistrationService:


def register_minimal(self, email, password):


"""Collect only necessary data (Principle 3: Data Minimization)."""


return User(


email=email,


password_hash=self.hash_password(password),


# Don't collect: phone, address, birthday, etc.


created_at=datetime.utcnow()


)




def set_default_privacy(self, user):


"""Privacy as default: opt-in for data sharing (Principle 2)."""


user.privacy_settings = PrivacySettings(


share_analytics=False, # Default: not shared


share_profile=False, # Default: not shared


email_marketing=False, # Default: not subscribed


data_retention_days=90 # Default: minimal retention


)


return user





Data Mapping

Data mapping identifies what personal data is collected, where it flows, how it is processed, and who has access. It is the foundation of any privacy program.




from dataclasses import dataclass


from enum import Enum


from typing import List, Dict




class DataCategory(Enum):


PERSONAL_IDENTIFIABLE = "PII"


FINANCIAL = "FINANCIAL"


HEALTH = "HEALTH"


BIOMETRIC = "BIOMETRIC"


BEHAVIORAL = "BEHAVIORAL"


LOCATION = "LOCATION"


COMMUNICATION = "COMMUNICATION"




class ProcessingPurpose(Enum):


ACCOUNT_MGMT = "account_management"


ANALYTICS = "analytics"


MARKETING = "marketing"


FRAUD_DETECTION = "fraud_detection"


LEGAL_COMPLIANCE = "legal_compliance"




@dataclass


class DataFlow:


source: str


destination: str


data_elements: List[str]


categories: List[DataCategory]


purposes: List[ProcessingPurpose]


legal_basis: str


retention_days: int


encryption: bool


third_party_sharing: bool




class DataMapper:


def __init__(self):


self.data_flows = []




def add_flow(self, flow: DataFlow):


"""Register a data flow in the mapping."""


self.data_flows.append(flow)




def generate_roda(self):


"""Generate Record of Processing Activities (GDPR Art. 30)."""


roda = []


for flow in self.data_flows:


roda.append({


'purpose': [p.value for p in flow.purposes],


'data_categories': [c.value for c in flow.categories],


'data_elements': flow.data_elements,


'data_subjects': flow.source,


'recipients': flow.destination,


'third_country_transfers': flow.third_party_sharing,


'retention_period': f"{flow.retention_days} days",


'security_measures': {


'encryption_at_rest': flow.encryption,


'encryption_in_transit': True,


'access_controls': 'role_based',


}


})


return roda





Privacy Impact Assessment (PIA)

A PIA systematically evaluates how a project or system affects individual privacy.




class PrivacyImpactAssessment:


def __init__(self, project_name, data_processor):


self.project_name = project_name


self.processor = data_processor


self.risks = []


self.mitigations = []




def assess_data_collection(self, data_flow: DataFlow):


"""Assess whether data collection is necessary and proportional."""


findings = []




# Necessity check


if DataCategory.LOCATION in data_flow.categories:


necessity = False


for purpose in data_flow.purposes:


if purpose in (ProcessingPurpose.ACCOUNT_MGMT,


ProcessingPurpose.FRAUD_DETECTION):


necessity = True


break


if not necessity:


findings.append({


'risk': 'Unnecessary location data collection',


'severity': 'HIGH',


'remediation': 'Remove location collection or add justification'


})




# Minimization check


if len(data_flow.data_elements) > 10:


findings.append({


'risk': 'Excessive data collection',


'severity': 'MEDIUM',


'remediation': 'Review each data element for necessity'


})




return findings




def generate_report(self):


"""Generate structured PIA report."""


return {


'project': self.project_name,


'assessment_date': datetime.utcnow().isoformat(),


'data_flows_analyzed': len(self.processor.data_flows),


'risks_identified': len(self.risks),


'mitigations_proposed': len(self.mitigations),


'residual_risk': self._calculate_residual_risk(),


'recommendation': self._get_recommendation(),


}





Consent Management

Consent management systems track user consent preferences and enforce them across data processing activities.




class ConsentManager:


def __init__(self, storage_backend):


self.storage = storage_backend




CONSENT_TYPES = {


'essential': {


'required': True,


'purpose': 'Website functionality',


'cookies': ['session', 'csrf']


},


'analytics': {


'required': False,


'purpose': 'Usage analysis and improvement',


'cookies': ['_ga', '_gid']


},


'marketing': {


'required': False,


'purpose': 'Personalized advertising',


'cookies': ['_fbp', 'ads_prefs']


},


'functional': {


'required': False,


'purpose': 'Enhanced features and preferences',


'cookies': ['lang_pref', 'theme']


}


}




def record_consent(self, user_id, consent_preferences, ip_address):


"""Record granular consent preferences."""


consent_record = {


'user_id': user_id,


'timestamp': datetime.utcnow().isoformat(),


'preferences': consent_preferences,


'ip_address': ip_address,


'user_agent': request.user_agent,


'consent_version': '2.1'


}




self.storage.store(f"consent:{user_id}", consent_record)


return consent_record




def check_consent(self, user_id, consent_type):


"""Check if user has granted specific consent."""


record = self.storage.retrieve(f"consent:{user_id}")


if not record:


return False




consent_config = self.CONSENT_TYPES.get(consent_type)


if consent_config and consent_config['required']:


return True # Essential consent is always active




return record['preferences'].get(consent_type, False)




def withdraw_consent(self, user_id, consent_type=None):


"""Withdraw consent (specific type or all)."""


record = self.storage.retrieve(f"consent:{user_id}")




if consent_type:


record['preferences'][consent_type] = False


record['withdrawn_at'] = datetime.utcnow().isoformat()


else:


for ctype in record['preferences']:


record['preferences'][ctype] = False


record['all_withdrawn_at'] = datetime.utcnow().isoformat()




self.storage.store(f"consent:{user_id}", record)





Conclusion

Privacy engineering requires embedding privacy controls into the design and architecture of systems, not layering them on afterward. Implement data mapping to understand data flows, conduct PIAs for high-risk processing, build consent management that respects user choice, and apply data minimization as a default. Privacy is a engineering discipline — treat it with the same rigor as security or performance engineering.