Fix YTM scraper

master
Sander Vocke 4 years ago
parent ba566126d5
commit 4b45610648
  1. 4
      client/src/components/windows/manage_links/BatchLinkDialog.tsx
  2. 86
      client/src/lib/integration/youtubemusic/YoutubeMusicWebScraper.tsx
  3. 14
      client/src/lib/query/Query.tsx

@ -5,13 +5,14 @@ import { $enum } from 'ts-enum-util';
import { IntegrationState, useIntegrations } from '../../../lib/integration/useIntegrations';
import { IntegrationWith, ImplIntegratesWith, IntegrationImpl, ResourceType, QueryResponseType, IntegrationUrls } from '../../../api/api';
import { start } from 'repl';
import { QueryLeafBy, QueryLeafOp, QueryNodeOp, queryNot } from '../../../lib/query/Query';
import { QueryFor, QueryLeafBy, QueryLeafOp, QueryNodeOp, queryNot, simplify } from '../../../lib/query/Query';
import { queryAlbums, queryArtists, queryItems, queryTracks } from '../../../lib/backend/queries';
import asyncPool from "tiny-async-pool";
import { getTrack } from '../../../lib/backend/tracks';
import { getAlbum } from '../../../lib/backend/albums';
import { getArtist } from '../../../lib/backend/artists';
import { modifyAlbum, modifyArtist, modifyTrack } from '../../../lib/saveChanges';
import { QueryItemType } from '../query/QueryWindow';
const useStyles = makeStyles((theme: Theme) =>
createStyles({
@ -59,6 +60,7 @@ async function makeTasks(
return;
}
let store = maybeStore as IntegrationWith;
let doForType = async (type: ResourceType) => {
let ids: number[] = ((await queryItems(
type,

@ -6,6 +6,9 @@ import { runInNewContext } from 'vm';
import { TextRotateVertical } from '@material-ui/icons';
import AlbumWindow from '../../../components/windows/album/AlbumWindow';
import { isUndefined } from 'util';
import { keys } from '@material-ui/core/styles/createBreakpoints';
import stringifyList from '../../stringifyList';
import { convertCompilerOptionsFromJson } from 'typescript';
let _ = require('lodash');
enum SearchType {
@ -65,20 +68,38 @@ export function extractInitialData(text: string): any | undefined {
// Return either one that worked.
let result = json1 || json2;
console.log("initial data:", result);
//console.log("initial data:", result);
return result;
}
// Helper function to recursively find key-value pairs in an Object.
function findRecursive (obj : Object | any[],
match_fn : (keys: any[], keys_str: string, value: any) => boolean,
find_inside_matches: boolean,
prev_keys : any[] = []) : any[] {
var retval : any[] = [];
for (const [key, value] of Object.entries(obj)) {
var keys : any[] = prev_keys.concat([key]);
let keys_str : string = keys.map((k:any) => String(k)).join('.');
if (match_fn (keys, keys_str, value)) {
retval.push(value);
if (!find_inside_matches) {
continue;
}
}
if (typeof value === 'object' && value !== null) {
retval = retval.concat(findRecursive(value, match_fn, find_inside_matches, keys));
}
}
return retval;
}
export function parseItems(initialData: any): {
tracks: IntegrationTrack[],
albums: IntegrationAlbum[],
artists: IntegrationArtist[],
} {
try {
var musicResponsiveListItemRenderers: { type: 'track' | 'album' | 'artist', content: any }[] = [];
console.log('initialData', initialData)
let retval: any = {
tracks: [],
albums: [],
@ -161,33 +182,36 @@ export function parseItems(initialData: any): {
return artist;
}
// Scrape for songs, artists and albums.
_.get(initialData, 'contents.sectionListRenderer.contents', []).forEach((contents: any) => {
_.get(contents, 'musicShelfRenderer.contents', []).forEach((_contents: any) => {
let runs = _.get(_contents, 'musicResponsiveListItemRenderer.flexColumns').map((column: any) => {
return _.get(column, 'musicResponsiveListItemFlexColumnRenderer.text.runs');
}).flat();
switch (_.get(_contents, 'musicResponsiveListItemRenderer.flexColumns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text', '')) {
case "Song": {
retval.tracks.push(parseTrack(_.get(_contents, 'musicResponsiveListItemRenderer'), runs));
break;
}
case "Artist": {
retval.artists.push(parseArtist(_.get(_contents, 'musicResponsiveListItemRenderer'), runs));
break;
}
case "Album":
case "Single": {
retval.albums.push(parseAlbum(_.get(_contents, 'musicResponsiveListItemRenderer'), runs));
break;
}
default: {
break;
}
// Gather all the items.
var musicResponsiveListItemRenderers =
findRecursive(initialData, (keys: any[], keys_str : string, val: any) => {
return keys_str.match(/.*musicResponsiveListItemRenderer$/g) !== null;
}, false);
musicResponsiveListItemRenderers.forEach((renderer: any) => {
let runs = _.get(renderer, 'flexColumns').map((column: any) => {
return _.get(column, 'musicResponsiveListItemFlexColumnRenderer.text.runs');
}).flat();
switch (_.get(renderer, 'flexColumns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text', '')) {
case "Song": {
retval.tracks.push(parseTrack(renderer, runs));
break;
}
})
});
case "Artist": {
retval.artists.push(parseArtist(renderer, runs));
break;
}
case "Album":
case "Single": {
retval.albums.push(parseAlbum(renderer, runs));
break;
}
default: {
break;
}
}
})
return retval;
} catch (e) {

@ -205,6 +205,9 @@ export function removePlaceholders(q: QueryElem | null): QueryElem | null {
}
export function simplify(q: QueryElem | null, queryFor: QueryFor | null): QueryElem | null {
// TODO: null should not be a valid input. Instead we should have
// constant true, constant false values.
if (q && isNodeElem(q)) {
var newOperands: QueryElem[] = [];
q.operands.forEach((o: QueryElem) => {
@ -212,10 +215,17 @@ export function simplify(q: QueryElem | null, queryFor: QueryFor | null): QueryE
if (s !== null) { newOperands.push(s); }
})
if (newOperands.length === 0) { return null; }
if (newOperands.length === 1) { return newOperands[0]; }
// AND/OR optimization
if ((newOperands.length === 1 && q.nodeOp == QueryNodeOp.And) ||
(newOperands.length === 1 && q.nodeOp == QueryNodeOp.Or)) {
return newOperands[0];
}
return { operands: newOperands, nodeOp: q.nodeOp };
}
// This shouldn't be part of simplification.
if (q && isLeafElem(q)) {
if (mapToServerLeafOp(q.leafOp, queryFor) === null ||
mapToServerProperty(q.a, queryFor) === null) {
@ -223,7 +233,7 @@ export function simplify(q: QueryElem | null, queryFor: QueryFor | null): QueryE
}
}
return q;
return q;
}
export function toApiQuery(q: QueryElem, queryFor: QueryFor | null): serverApi.Query {

Loading…
Cancel
Save