parent
9c661d67ee
commit
cd9ee9bbb1
8 changed files with 292 additions and 12 deletions
After Width: | Height: | Size: 696 B |
@ -0,0 +1,203 @@ |
||||
import React from 'react'; |
||||
import Integration, { IntegrationFeature, IntegrationAlbum, IntegrationArtist, IntegrationSong } from '../Integration'; |
||||
import StoreLinkIcon, { ExternalStore } from '../../../components/common/StoreLinkIcon'; |
||||
|
||||
enum SearchType { |
||||
Song = 'track', |
||||
Artist = 'artist', |
||||
Album = 'album', |
||||
}; |
||||
|
||||
export function extractInitialData(text: string): any | undefined { |
||||
// At the time of writing this, the scraper is trying to capture from the following block:
|
||||
//
|
||||
// initialData.push({
|
||||
// path: ...,
|
||||
// params: {"query":"something"},
|
||||
// data: "THIS",
|
||||
// });
|
||||
//
|
||||
// the THIS part.
|
||||
|
||||
// Get the whole line containing the data part.
|
||||
let pattern = /initialData\.push\({[\n\r\s]*path:.*[\n\r\s]+params:\s*{\s*['"]query['"].*[\n\r\s]+data:\s*['"](.*)['"]\s*[\n\r]/ |
||||
let m = text.match(pattern); |
||||
let dataline = Array.isArray(m) && m.length >= 2 ? m[1] : undefined; |
||||
if (!dataline) { return undefined; } |
||||
|
||||
// Now parse the data line.
|
||||
let dataline_clean = dataline.replace(/\\"/g, '"').replace(/\\\\"/g, '\\"') |
||||
console.log(dataline); |
||||
console.log(dataline_clean); |
||||
|
||||
let json = JSON.parse(dataline_clean); |
||||
return json; |
||||
} |
||||
|
||||
export function parseSongs(initialData: any): IntegrationSong[] { |
||||
try { |
||||
var songMusicResponsiveListItemRenderers: any[] = []; |
||||
|
||||
// Scrape for any "Song"-type items.
|
||||
initialData.contents.sectionListRenderer.contents.forEach((c: any) => { |
||||
if (c.musicShelfRenderer) { |
||||
c.musicShelfRenderer.contents.forEach((cc: any) => { |
||||
if (cc.musicResponsiveListItemRenderer && |
||||
cc.musicResponsiveListItemRenderer.flexColumns && |
||||
cc.musicResponsiveListItemRenderer.flexColumns[1] |
||||
.musicResponsiveListItemFlexColumnRenderer.text.runs[0].text === "Song") { |
||||
songMusicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer); |
||||
} |
||||
}) |
||||
} |
||||
}) |
||||
|
||||
return songMusicResponsiveListItemRenderers.map((s: any) => { |
||||
let videoId = s.doubleTapCommand.watchEndpoint.videoId; |
||||
let columns = s.flexColumns; |
||||
|
||||
if (columns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text !== "Song") { |
||||
throw new Error('song item doesnt match scraper expectation'); |
||||
} |
||||
let title = columns[0].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text; |
||||
|
||||
let artists = columns[2].musicResponsiveListItemFlexColumnRenderer.text.runs.filter((run: any) => { |
||||
return 'navigationEndpoint' in run; |
||||
}).map((run: any) => { |
||||
let id = run.navigationEndpoint.browseEndpoint.browseId; |
||||
return { |
||||
url: `https://music.youtube.com/browse/${id}`, |
||||
name: run.text, |
||||
} |
||||
}); |
||||
|
||||
let albums = columns[3].musicResponsiveListItemFlexColumnRenderer.text.runs.filter((run: any) => { |
||||
return 'navigationEndpoint' in run; |
||||
}).map((run: any) => { |
||||
let id = run.navigationEndpoint.browseEndpoint.browseId; |
||||
return { |
||||
url: `https://music.youtube.com/browse/${id}`, |
||||
name: run.text, |
||||
artist: artists[0], |
||||
} |
||||
}); |
||||
|
||||
return { |
||||
title: title, |
||||
url: `https://music.youtube.com/watch?v=${videoId}`, |
||||
artist: artists[0], |
||||
album: albums[0], |
||||
} |
||||
}) |
||||
} catch (e) { |
||||
console.log("Error parsing songs:", e.message); |
||||
return []; |
||||
} |
||||
} |
||||
|
||||
export default class YoutubeMusicWebScraper extends Integration { |
||||
integrationId: number; |
||||
|
||||
constructor(integrationId: number) { |
||||
super(integrationId); |
||||
this.integrationId = integrationId; |
||||
} |
||||
|
||||
getFeatures(): IntegrationFeature[] { |
||||
return [ |
||||
IntegrationFeature.Test, |
||||
IntegrationFeature.SearchSong, |
||||
IntegrationFeature.SearchAlbum, |
||||
IntegrationFeature.SearchArtist, |
||||
] |
||||
} |
||||
|
||||
getIcon(props: any) { |
||||
return <StoreLinkIcon whichStore={ExternalStore.YoutubeMusic} {...props} /> |
||||
} |
||||
|
||||
providesStoreLink() { |
||||
return ExternalStore.YoutubeMusic; |
||||
} |
||||
|
||||
async test(testParams: {}) { |
||||
const response = await fetch( |
||||
(process.env.REACT_APP_BACKEND || "") + |
||||
`/integrations/${this.integrationId}/search?q=${encodeURIComponent('No One Knows Queens Of The Stone Age')}`); |
||||
|
||||
let text = await response.text(); |
||||
let songs = parseSongs(extractInitialData(text)); |
||||
|
||||
console.log("Found songs", songs); |
||||
|
||||
if (!Array.isArray(songs) || songs.length === 0 || songs[0].title !== "No One Knows") { |
||||
throw new Error("Test failed; No One Knows was not correctly identified."); |
||||
} |
||||
} |
||||
|
||||
async searchSong(query: string, limit: number): Promise<IntegrationSong[]> { |
||||
const response = await fetch( |
||||
(process.env.REACT_APP_BACKEND || "") + |
||||
`/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); |
||||
|
||||
let text = await response.text(); |
||||
return parseSongs(extractInitialData(text)); |
||||
} |
||||
async searchAlbum(query: string, limit: number): Promise<IntegrationAlbum[]> { return []; } |
||||
async searchArtist(query: string, limit: number): Promise<IntegrationArtist[]> { return []; } |
||||
|
||||
async search(query: string, type: SearchType, limit: number): |
||||
Promise<IntegrationSong[] | IntegrationAlbum[] | IntegrationArtist[]> { |
||||
|
||||
return []; |
||||
// const response = await fetch(
|
||||
// (process.env.REACT_APP_BACKEND || "") +
|
||||
// `/integrations/${this.integrationId}/v1/search?q=${encodeURIComponent(query)}&type=${type}&limit=${limit}`);
|
||||
|
||||
// if (!response.ok) {
|
||||
// throw new Error("Spotify Client Credentials search failed: " + JSON.stringify(response));
|
||||
// }
|
||||
|
||||
// let json = await response.json();
|
||||
|
||||
// console.log("Response:", json);
|
||||
|
||||
// switch(type) {
|
||||
// case SearchType.Song: {
|
||||
// return json.tracks.items.map((r: any): IntegrationSong => {
|
||||
// return {
|
||||
// title: r.name,
|
||||
// url: r.external_urls.spotify,
|
||||
// artist: {
|
||||
// name: r.artists && r.artists[0].name,
|
||||
// url: r.artists && r.artists[0].external_urls.spotify,
|
||||
// },
|
||||
// album: {
|
||||
// name: r.album && r.album.name,
|
||||
// url: r.album && r.album.external_urls.spotify,
|
||||
// }
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
// case SearchType.Artist: {
|
||||
// return json.artists.items.map((r: any): IntegrationArtist => {
|
||||
// return {
|
||||
// name: r.name,
|
||||
// url: r.external_urls.spotify,
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
// case SearchType.Album: {
|
||||
// return json.albums.items.map((r: any): IntegrationAlbum => {
|
||||
// return {
|
||||
// name: r.name,
|
||||
// url: r.external_urls.spotify,
|
||||
// artist: {
|
||||
// name: r.artists[0].name,
|
||||
// url: r.artists[0].external_urls.spotify,
|
||||
// },
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
} |
||||
} |
Loading…
Reference in new issue