- Author:
- NixonDash
- Posted:
- February 21, 2023
- Language:
- Python
- Version:
- 3.2
- Score:
- 0 (after 0 ratings)
Stuff
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | # models.py from django.db import models class Project(models.Model): title = models.CharField(max_length=200) description = models.TextField() company_name = models.CharField(max_length=200) website = models.URLField() date_posted = models.DateField(auto_now_add=True) # forms.py from django import forms class ScraperForm(forms.Form): keywords = forms.CharField(label='Keywords', max_length=200) websites = forms.CharField(label='Websites', max_length=200) # views.py from django.shortcuts import render from .models import Project from .forms import ScraperForm import requests from bs4 import BeautifulSoup from django.utils import timezone def scrape_projects(request): if request.method == 'POST': form = ScraperForm(request.POST) if form.is_valid(): keywords = form.cleaned_data['keywords'].split() websites = form.cleaned_data['websites'].split() for website in websites: url = website.strip() response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') for keyword in keywords: projects = soup.find_all(string=lambda text: text and keyword.lower() in text.lower()) for project in projects: # Extract project information title = project.find('h2').text.strip() description = project.find('p').text.strip() company_name = project.find('span', class_='company-name').text.strip() website = url date_posted = timezone.now() # Save project to database project_obj = Project(title=title, description=description, company_name=company_name, website=website, date_posted=date_posted) project_obj.save() projects = Project.objects.all() return render(request, 'projects.html', {'projects': projects}) else: form = ScraperForm() return render(request, 'index.html', {'form': form}) |
More like this
- Template tag - list punctuation for a list of items by shapiromatron 1 year ago
- JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 1 year ago
- Serializer factory with Django Rest Framework by julio 1 year, 7 months ago
- Image compression before saving the new model / work with JPG, PNG by Schleidens 1 year, 8 months ago
- Help text hyperlinks by sa2812 1 year, 8 months ago
Comments
Please login first before commenting.