Skip to main content

Python crawlers to crawl pure girl pictures

Before running the code, you need to install BeautifulSoup, requests, os library.

From bs4 import BeautifulSoup
Import requests
Import time
Import os

Def get_html(url):
    Try:
        Response=requests.get(url)
        Response.encoding='gb2312'
        If response.status_code==200:
            Print('Successfully connected! URL is '+url)
            Return response.text
    Except requests.RequestException:
       Return None

Def get_url_and_name(url):
    "The passed argument is the main page link, the return value is a list with 2 elements, element 1 is the map package link, and element 2 is the map package name."
    Html=get_html(url)
    Soup=BeautifulSoup(html,'lxml')
    Name=[]
    Url_1=[]
    List2=soup.find_all(class_='t')
    Sign=1
    For item in list2:
        If(sign!=1 and sign!=42):
            Url_temp=item.find('a').get('href')
            Name_temp=item.find(class_='title').find('a').get('title')
            Url_1.append(url_temp)
            Name.append(name_temp)
        Sign=sign+1
    Temp=[url_1,name]
    Return temp

Def get_pic_url(url):
    "The argument passed in is the link to the map package, and the return value is the link to all the images in the map package."
    Address=[]
    Html1=get_html(url)
    Soup=BeautifulSoup(html1,'lxml')
    List4=soup.find(class_='page').find_all('a')
    Temp=1
    While(temp<len(list4)):
        If(temp==1):
            Url_3=url
        Else:
            Url_3=url.replace('.html','_'+str(temp)+'.html')
        Temp=temp+1
        Html2=get_html(url_3)
        Soup1=BeautifulSoup(html2,'lxml')
        List3=soup1.find(class_='content').find_all('img')
        For item in list3:
            Address.append(item.get('src'))
    Return address
    
Def pic_download(url,name,path):
    "url is a list of all image links for a map package, name is the name of the package, and path is the downloaded directory."
    Os.mkdir(path+'./'+name)
    # Because the mkdir function is used, it is necessary to ensure that the folder to be created cannot exist, otherwise an error will be reported.
    Print('The package being downloaded is named '+name')
    Index=1
    For i1 in url:
        Filename = path+'./'+name+'./'+str(index) +'.jpg'
        With open(filename, 'wb') as f:
            Img = requests.get(i1).content
            F.write(img)
        Index += 1
        Time.sleep(2)
    Print(name+'download completed!')

Def main(i):
#i is the number of pages of the homepage of the map (the first few pages)
    Url='https://www.keke234.com/gaoqing/list_5_'+str(i)+'.html'
    Path=r'H:\autoDownLoadPictures\savePicture'
#path is a custom path
    Information=get_url_and_name(url)
    Num=0
    For item in information[0]:
        Address=get_pic_url(item)
        Pic_download(address,information[1][num],path)
        Num=num+1

If __name__ == '__main__':
    For i in range(1,2):
        Main(i)

Comments

Popular posts from this blog

span[class~="sr-only"]

  The  span[class~="sr-only"]  selector will select any  span  element whose  class   includes   sr-only . Create that selector, and give it a  border  property set to  0 . span [ class ~= "sr-only" ] {    border:   0 ; }

Use Recursion to Create a Range of Numbers

  function   rangeOfNumbers ( startNum ,  endNum ) {    if ( startNum <= endNum )   {      const   arrNumber  =  rangeOfNumbers ( startNum ,  endNum - 1 );      arrNumber . push ( endNum );      return   arrNumber ;   }    else  {      return  [];   }       }; console . log ( rangeOfNumbers ( 6 , 8 ));//[6,7,8] console . log ( rangeOfNumbers ( 3 , 12 )); //[ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ]

About the Little Lemon receipt maker exercise

 My homework and exercise of the lesson "About the Little Lemon receipt maker exercise" of the class "programming with Javascript" on coursera. const menu = [     {         Dish : "Italian pasta" ,         price : 9.55     },     {         Dish : "Rice with veggies" ,         price : 8.65     },     {         Dish : "Chicken with potatoes" ,         price : 15.55     },     {         Dish : "Vegetarian Pizza" ,         price : 6.45     } ]; function receiptMaker ( arr , bool ) {     if ( bool == false )     {         console . log ( "Prices without tax:" );         arr . forEach ( element => {             console . log ( `Dish: ${ element . Dish } Price (incl.tax):$ ${ element . price } ` );                     });     }     else     {         console . log ( "Prices with 20% tax:" );         arr . forEach ( element => {             console . log ( `Dish: ${ element . Dish } Price (inc